/* * JaamSim Discrete Event Simulation * Copyright (C) 2014 Ausenco Engineering Canada Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.jaamsim.input; import java.util.ArrayList; public class ExpTokenizer { public static final int VAR_TYPE = 0; public static final int NUM_TYPE = 1; public static final int SYM_TYPE = 2; public static final int SQ_TYPE = 3; // Square quoted tokens public static final int DSQ_TYPE = 4; // Double Square quoted tokens public static class Token { public int type; public String value; public int pos; } public static boolean isWhiteSpace(char c) { if (Character.isWhitespace(c)) return true; return false; } // Use java style identifiers for now public static boolean isVarStartChar(char c) { if (Character.isJavaIdentifierStart(c)) return true; return false; } private static boolean isVarMemberChar(char c) { if (Character.isJavaIdentifierPart(c)) return true; return false; } private static boolean isNumMemberChar(char c) { if (Character.isDigit(c)) return true; if (c=='.') return true; return false; } // List of 'long' symbols to check for, in order private static ArrayList<String> longSymbols = new ArrayList<>(); static { longSymbols.add("=="); longSymbols.add("!="); longSymbols.add("<="); longSymbols.add(">="); longSymbols.add("&&"); longSymbols.add("||"); } public static ArrayList<Token> tokenize(String input) throws ExpError { int pos = 0; ArrayList<Token> res = new ArrayList<>(); while (pos < input.length()) { char c = input.charAt(pos); if (isWhiteSpace(c)) { pos++; continue; } if (c == '[') { // This is the beginning of a square quoted string pos = getSQToken(res, pos, input); continue; } if (isVarStartChar(c)) { pos = getVarToken(res, pos, input); continue; } if (Character.isDigit(c)){ pos = getNumToken(res, pos, input); continue; } pos = getSymbolToken(res, pos, input); } return res; } private static int getVarToken(ArrayList<Token> res, int startPos, String input) { Token newTok = new Token(); newTok.type = VAR_TYPE; newTok.pos = startPos; int pos = startPos; StringBuilder sb = new StringBuilder(); while (pos < input.length()) { char next = input.charAt(pos); if (!isVarMemberChar(next)) { break; } sb.append(next); ++pos; } newTok.value = sb.toString(); res.add(newTok); return pos; } private static int getSQToken(ArrayList<Token> res, int startPos, String input) throws ExpError { boolean isDoubleQuoted = false; int closePos = startPos + 1; if (input.length() > startPos+1 && input.charAt(startPos+1) == '[') { // This is double square quoted token isDoubleQuoted = true; closePos += 1; } while (closePos < input.length()) { char c = input.charAt(closePos); if (c == '[') throw new ExpError(input, closePos, "Nested square quotes"); if (c == ']') break; closePos++; } if (closePos == input.length()) { throw new ExpError(input, startPos, "No closing square brace for brace"); } if (isDoubleQuoted) { // Check for the second closing brace if ((closePos+1) == input.length() || input.charAt(closePos+1) != ']') { throw new ExpError(input, startPos, "No closing double brace for double square brace string"); } Token newTok = new Token(); newTok.pos = startPos + 1; newTok.type = DSQ_TYPE; newTok.value = input.substring(startPos + 2, closePos); res.add(newTok); return closePos + 2; } Token newTok = new Token(); newTok.pos = startPos; newTok.type = SQ_TYPE; newTok.value = input.substring(startPos + 1, closePos); res.add(newTok); return closePos + 1; } // TODO: Should this include 'f' or 'd' as in the java convention? Also, should we support hex? private static int getNumToken(ArrayList<Token> res, int startPos, String input) throws ExpError { Token newTok = new Token(); newTok.type = NUM_TYPE; newTok.pos = startPos; int pos = startPos; StringBuilder sb = new StringBuilder(); while (pos < input.length()) { char next = input.charAt(pos); if (!isNumMemberChar(next)) { break; } sb.append(next); ++pos; } // Now check for an optional exponent if (pos < input.length() && (input.charAt(pos) == 'e' || input.charAt(pos) == 'E')) { sb.append(input.charAt(pos++)); // Now check for an option - if (pos < input.length() && input.charAt(pos) == '-') { sb.append(input.charAt(pos++)); } // An another digit while (pos < input.length()) { char next = input.charAt(pos); if (!Character.isDigit(next)) { break; } sb.append(next); ++pos; } } newTok.value = sb.toString(); // Check that this string can be parsed to a valid double try { Double.parseDouble(newTok.value); } catch (NumberFormatException ex) { throw new ExpError(input, startPos, "Error parsing number literal: " + newTok.value); } res.add(newTok); return pos; } private static int getSymbolToken(ArrayList<Token> res, int startPos, String input) { // For now, tokens are single character strings that are not numbers, variables or whitespace Token newTok = new Token(); newTok.type = SYM_TYPE; newTok.pos = startPos; for (String s : longSymbols) { if (input.length() - startPos >= s.length() && input.substring(startPos, startPos + s.length()).equals(s)) { // This option matches the current long symbol newTok.value = s; } } if (newTok.value == null) { // Use a simple one character symbol newTok.value = input.substring(startPos, startPos + 1); } res.add(newTok); return startPos + newTok.value.length(); } }