/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * A heuristic number parser. That tries to analyze a given String in a more * flexible way than the traditional Java number parse: * * 1. All "," are replaced by "." * 2. ¼,½,¾ are replaced by 0.25, 0.5, 0.75 respectively * 3. Ranges ([number]-[number])are matched and the arithmetic average is used as result * 4. All Prefixes and suffices are ignored * * Example: "Weight: 8,5 - 10,5 g" would result in 9.5 * * @author Michael Wurst * */ public class NumberParser { /** * Parse a number possibly surrounded by other information. * @param s the string * @return a double representation or NaN if it cannot be parsed */ public static double parse(String s) { try { return Double.parseDouble(s); } catch (NumberFormatException e) { // Try other heuristics in this case } // First, replace all ',' by '.' s = s.replaceAll(",","."); s = s.replaceAll("\u00BC",".25"); s = s.replaceAll("\u00BD",".5"); s = s.replaceAll("\u00BE",".75"); try { return Double.parseDouble(s); } catch (NumberFormatException e) { // Try other heuristics in this case } // Try to resolve ranges Pattern p2 = Pattern.compile("[^0-9]*([[0-9.]\\.]+)\\s?-\\s?([[0-9]\\.]+)(.*)"); Matcher m2 = p2.matcher(s); if(m2.matches()) { try { return (Double.parseDouble(m2.group(1))+Double.parseDouble(m2.group(2)))/2; } catch (NumberFormatException e) { // Try other heuristics in this case } } // Try to ignore all suffixes Pattern p1 = Pattern.compile("[^0-9]*([[0-9]\\.]+)(.*)"); Matcher m1 = p1.matcher(s); if(m1.matches()) { try { return Double.parseDouble(m1.group(1)); } catch (NumberFormatException e) { // Try other heuristics in this case } } return Double.NaN; } /** * This method parses the given string as double value. * It first tries the normal parse method, then tests if it is the ? * and would return NaN in this case. * Otherwise a NumberFormatExceptin is thrown. */ public static double parseDouble(String s) throws NumberFormatException { try { return Double.parseDouble(s); } catch (NumberFormatException e) {} // try if NaN if (s.equals("?")) return Double.NaN; throw new NumberFormatException(); } }