package org.docx4j.model.fields; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.docx4j.Docx4jProperties; /** * Extract a number from a string. * * The number can have a decimal point. * * * @author jharrop * */ public class NumberExtractor { static String regex = "([\\+-]?(\\.\\d+))|" + // optional +/-, then decimal place, then digits "([\\+-]?[0](\\.\\d+))|" + // optional +/-, then 0, then decimal place, then digits "([\\+-]?(\\d+)(\\.\\d*)?)"; // optional +/-, then digit, then optional (decimal place, then zero or more digits) static String decimalSymbolFallback; static String groupingSeparator; static Pattern pattern = Pattern.compile(regex); static { decimalSymbolFallback = Docx4jProperties.getProperty("docx4j.Fields.Numbers.DecimalSymbol", "."); groupingSeparator = Docx4jProperties.getProperty("docx4j.Fields.Numbers.GroupingSeparator", ","); } private String decimalSymbol; public NumberExtractor(String decimalSymbol) { if (decimalSymbol==null) { this.decimalSymbol = decimalSymbolFallback; } else { this.decimalSymbol = decimalSymbol; } } /** * Convert radix point to ".", * and remove thousands separator, * in preparation for our regex. */ private String prepare(String string) { string = string.replaceAll(groupingSeparator, ""); if (decimalSymbol.equals(".")) { // do nothing return string; } else { return string.replace(decimalSymbol, "."); } } /** * @param string * @return * @throws java.lang.IllegalStateException if no match */ public String extractNumber(String string) throws java.lang.IllegalStateException { try { Matcher makeMatch = pattern.matcher( prepare(string)); if (makeMatch.find() ) { String matchingSubstring = makeMatch.group(); // Check that there is no // int pos = string.indexOf(matchingSubstring) + matchingSubstring.length()-1; // int pos2 = string.indexOf(' ', pos); // // if (pos2>pos // || (pos2<0 && (string.length() - pos > 0) )) { // // there are alpha chars on the end of the number // throw new java.lang.IllegalStateException("Not a number"); // } return matchingSubstring; } else { throw new java.lang.IllegalStateException( string + " does not contain a number"); } } catch (java.lang.IllegalStateException noMatch) { // This is what Word does return string; } } /** * @param args */ public static void main(String[] args) { NumberExtractor nex = new NumberExtractor("."); System.out.println(nex.extractNumber("€ HEH EUR")); } }