/******************************************************************************* * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com) * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt ******************************************************************************/ package com.opendoorlogistics.core.formulae; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.opendoorlogistics.core.utils.strings.Strings; public final class StringTokeniser { final public static String [] minuses = new String[]{"-", "\u2013", "\u2014"}; final public static String doubleNumber = "(\\d+\\.\\d*)"; final public static String intNumber = "\\d+"; final public static String[] tokensInLine = new String[] { "(\"[ \\t#-~!]*\")", "('[ \\t#-~!]*')", "(<=)", "&&","&", "\\|\\|","\\|" ,"==", ">=", "!=", "<>", "=", "<", ">", "/", "\\++", "\\(", "\\)", "\\�", "\\-", "%", "\\*", ",", "\\}", "\\{", ":" ,"#", "\\[", "\\]" }; protected final static Pattern lineTokeniserPattern; protected final static Pattern intNumberCheck; final public static String VARIABLE = "([a-z][abcdefghijklmnopqrstuvwxyz0123456789._]*)"; // final private static Pattern okVariableOrMethodName = Pattern.compile("^[a-z][\\w]*$", Pattern.CASE_INSENSITIVE); static{ StringBuilder builder = new StringBuilder(); for (int i = 0; i < tokensInLine.length; i++) { if (i > 0) builder.append("|"); builder.append(tokensInLine[i]); } // lineTokeniserPattern = Pattern.compile("^[\\s]*(\\w+" + "|" + doubleNumber + "|" + intNumber + "|" + builder.toString() + ")[\\s]*?", Pattern.CASE_INSENSITIVE); lineTokeniserPattern = Pattern.compile("^[\\s]*("+ VARIABLE + "|" + doubleNumber + "|" + intNumber + "|" + builder.toString() + ")[\\s]*?", Pattern.CASE_INSENSITIVE); intNumberCheck = Pattern.compile("^\\d+$"); } // public static ArrayList<ArrayList<String>> groupTokensByCommaSeparators(List<String> tokens) { // ArrayList<ArrayList<String>> grouped = new ArrayList<>(); // // int n = tokens.size(); // ArrayList<String> current = new ArrayList<>(); // for (int i = 0; i < n; i++) { // if (tokens.get(i).equals(",")) { // grouped.add(current); // current = new ArrayList<>(); // } else { // current.add(tokens.get(i)); // } // } // // if (current.size() > 0) { // grouped.add(current); // } // return grouped; // } public static class StringToken{ private final String original; private final String lowerCase; private final int position; public StringToken(String original, int position) { this.original = original; this.lowerCase = original.toLowerCase(); this.position = position; } public String getOriginal() { return original; } public String getLowerCase() { return lowerCase; } public int getPosition() { return position; } @Override public String toString(){ return lowerCase; } } public static void main(String []args){ class Print{ void print(String s){ List<StringToken> list = tokenise(s); System.out.println(s + " -> " + list); } } // char l = '\u201C'; // char r = '\u201D'; // // System.out.println(l); // System.out.println(r); Print print = new Print(); print.print("1+7+polygon.name*2"); print.print("5+7.433*4"); } public static List<StringToken> tokenise(String s) { // replace odd quotes with normal quotes. s = Strings.standardiseSpeechMarks(s); ArrayList<StringToken> list = new ArrayList<>(); int currentIndx = 0; Matcher matcher = null; while (true) { CharSequence sub = s.subSequence(currentIndx, s.length()); matcher = lineTokeniserPattern.matcher(sub); if (matcher.find()) { String m = matcher.group(1); list.add(new StringToken(m ,list.size())); currentIndx += matcher.end(); } else { String noSpaces =sub.toString().trim(); if(noSpaces.length()>0){ throw new RuntimeException("Could not identify part of formula: " + noSpaces); } break; } } // // hack - join doubles // ArrayList<StringToken> ret = new ArrayList<>(); // while(list.size()>0){ // int nbToRemove=1; // if(list.size()>=3){ // if(intNumberCheck.matcher(list.get(0).getOriginal()).matches() // && list.get(1).getOriginal().equals(".") // && intNumberCheck.matcher(list.get(2).getOriginal()).matches()){ // ret.add( new StringToken(list.get(0).getOriginal() + "." + list.get(2).getOriginal(), list.get(0).getPosition())); // nbToRemove = 3; // } // else{ // ret.add(list.get(0)); // } // }else{ // ret.add(list.get(0)); // } // // for(int i =0 ; i < nbToRemove ; i++){ // list.remove(0); // } // } return list; } // static Integer safeParseInt(String s){ // if(intNumberCheck.matcher(s).find()){ // return Integer.parseInt(s); // } // return null; // } public static String[] minuses(){ return minuses; } public static boolean isMinus(String token){ if(token==null){ return false; } for(String minus : minuses){ if(token.equals(minus)){ return true; } } return false; } // public static double [] readLineAsDoubles(String line){ // List<String> tokens = tokenise(line, false); // int i =0; // while(i < tokens.size()-1){ // if(isMinus(tokens.get(i))){ // tokens.set(i+1, "-" + tokens.get(i+1)); // tokens.remove(i); // }else{ // i++; // } // } // // int n = tokens.size(); // double [] ret = new double[n]; // for(i = 0 ; i < n ; i++){ // ret[i] = Double.parseDouble(tokens.get(i)); // } // return ret; // } public static boolean isIntegerNumber(String s){ return intNumberCheck.matcher(s).matches(); } }