package de.skuzzle.polly.core.parser; import java.nio.charset.Charset; import java.util.Calendar; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.TreeSet; import de.skuzzle.polly.core.parser.problems.Problems; /* * This class is subject to ISSUE: 0000018 * Need to proof working with different encodings. */ /** * Extends an {@link AbstractTokenStream} to read actual {@link Token}s from an input * String. The following identifiers are read as keywords: * <pre> * true * false * now * </pre> * * <p>Identifiers may start with a _ or a letter and can then contain any letter, * number or further _.</p> * * <p>Numbers read may have a decimal part and optionally a mantiss part * (scientific notation). More formally numbers may have the format * {@code \d+(\.\d+)?([+-]?[eE]\d+)?}</p> * * <p>There are a few different kinds of date-tokens which after they are read, all * contain a {@link Date} value. A date may be a pure time definition like * <code>\d{1,2}:\d{1,2}</code> where the first number must lie in the interval * {@code [0;23]} and the second number in {@code [0;59]}.</p> * * <p>Further a date-token can be a pure date like <code>\d{1,2}.\d{1,2}.\d{4}</code> * where the three parts must be valid for date definitions.</p> * * <p>And a date can be specified by an amount of time from now. Like {@code 1d4h10m} * specifies a date with the value one day, four hours and 10 minutes from now. * Valid characters for such a date definition are:</p> * <pre> * y year interpreted as 365d * w week interpreted as 7d * d day interpreted as 24h * h hour interpreted as 60m * m minute interpreted as 60s * s second interpreted as 1000ms * </pre> * * <p>Each of this characters may only occur once within one of such date definition but * must not be ordered.</p> * * @author Simon * */ public class InputScanner extends AbstractTokenStream { /** * The maximum radix value for radix'ed integers. Higher values will cause a * {@link ParseException} to be thrown when hitting on. * * Note: The minimum value is always 2 (by nature) */ public final static int MAX_RADIX = Character.MAX_RADIX; protected Map<String, TokenType> keywords; private boolean skipWhiteSpaces; public InputScanner(String stream) { super(stream); this.prepareKeywords(); } public InputScanner(String stream, Charset charset) { super(stream, charset); this.prepareKeywords(); } protected void prepareKeywords() { this.keywords = new HashMap<String, TokenType>(); this.keywords.put("xor", TokenType.XOR); //$NON-NLS-1$ this.keywords.put("true", TokenType.TRUE); //$NON-NLS-1$ this.keywords.put("false", TokenType.FALSE); //$NON-NLS-1$ this.keywords.put("now", TokenType.DATETIME); //$NON-NLS-1$ this.keywords.put("polly", TokenType.POLLY); //$NON-NLS-1$ this.keywords.put("public", TokenType.PUBLIC); //$NON-NLS-1$ this.keywords.put("temp", TokenType.TEMP); //$NON-NLS-1$ this.keywords.put("help", TokenType.QUESTION); //$NON-NLS-1$ this.keywords.put("if", TokenType.IF); //$NON-NLS-1$ this.keywords.put("del", TokenType.DELETE); //$NON-NLS-1$ this.keywords.put("inspect", TokenType.INSPECT); //$NON-NLS-1$ this.keywords.put("list", TokenType.LIST); //$NON-NLS-1$ this.keywords.put("delay", TokenType.DELAY); //$NON-NLS-1$ this.keywords.put("reinterpret", TokenType.REINTERPRET); //$NON-NLS-1$ /* To avoid 1char identifiers "_" */ this.keywords.put("_", TokenType.UNKNOWN); //$NON-NLS-1$ } /** * Sets whether whitespaces are currently being skipped. * * @param value Whether whitespaces should be skipped. */ public void setSkipWhiteSpaces(boolean value) { this.skipWhiteSpaces = value; } /** * Gets whether whitespaces are currently being skipped. * * @return Whether whitespaces are currently being skipped. */ public boolean skipWhiteSpaces() { return this.skipWhiteSpaces; } @Override public boolean match(TokenType type) throws ParseException { if (type == TokenType.SEPERATOR && this.skipWhiteSpaces) { throw new IllegalArgumentException( "can not match token type SEPARATOR while 'skipWhiteSpaces' is enabled"); } return super.match(type); } @Override protected synchronized Token readToken() throws ParseException { final Token next = this.readTokenInternal(); if (ParserProperties.should(ParserProperties.ENABLE_SCANNER_DEBUGGING)) { System.out.println(next.toString()); } return next; } protected final Token readTokenInternal() throws ParseException { int state = 0; int tokenStart = this.getStreamIndex(); StringBuilder currentString = new StringBuilder(); while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (next == -1) { final Position pos = new Position(tokenStart, this.getStreamIndex() + 1); return new Token(TokenType.EOS, pos); } if (Character.isWhitespace(next)) { if (!this.skipWhiteSpaces) { this.pushBack(next); state = 1; } else { // skipping whitespaces, so move token start ++tokenStart; } } else if (InputScanner.isIdentifierStart(next)) { this.pushBack(next); return this.readIdentifier(); } else if (next == '0') { /* * '0' cannot start a number, but starts the 0x: Operator */ return this.readRadixOperator(); } else if (Character.isDigit(next)) { this.pushBack(next); return this.readNumber(); } else if (next == '"') { this.pushBack(next); return this.readString(); } else if (next == '#') { return this.readChannel(); } else if (next == '+') { state = 9; } else if (next == '-') { state = 10; } else if (next == '.') { state = 8; } else if (next == '@') { return this.readUser(); } else if (next == ',') { return new Token(TokenType.COMMA, this.spanFrom(tokenStart), ","); //$NON-NLS-1$ } else if (next == '*') { return new Token(TokenType.MUL, this.spanFrom(tokenStart), "*"); //$NON-NLS-1$ } else if (next == '/') { state = 15; } else if (next == '\\') { state = 13; } else if (next == '%') { return new Token(TokenType.MOD, this.spanFrom(tokenStart), "%"); //$NON-NLS-1$ } else if (next == '$') { return new Token(TokenType.DOLLAR, this.spanFrom(tokenStart),"$"); //$NON-NLS-1$ } else if (next == '^') { state = 14; } else if (next == '!') { state = 2; } else if (next == '(') { return new Token(TokenType.OPENBR, this.spanFrom(tokenStart), "("); //$NON-NLS-1$ } else if (next == ')') { return new Token(TokenType.CLOSEDBR, this.spanFrom(tokenStart), ")"); //$NON-NLS-1$ } else if (next == '[') { return new Token(TokenType.OPENSQBR, this.spanFrom(tokenStart), "["); //$NON-NLS-1$ } else if (next == ']') { return new Token(TokenType.CLOSEDSQBR, this.spanFrom(tokenStart), "]"); //$NON-NLS-1$ } else if (next == '{') { return new Token(TokenType.OPENCURLBR, this.spanFrom(tokenStart), "{"); //$NON-NLS-1$ } else if (next == '}') { return new Token(TokenType.CLOSEDCURLBR, this.spanFrom(tokenStart), "}"); //$NON-NLS-1$ } else if (next == '?') { state = 12; } else if (next == '~') { return new Token(TokenType.WAVE, this.spanFrom(tokenStart), "~"); //$NON-NLS-1$ } else if (next == ';') { return new Token(TokenType.SEMICOLON, this.spanFrom(tokenStart), ";"); //$NON-NLS-1$ } else if (next == '&') { state = 7; } else if (next == '|') { state = 6; } else if (next == ':') { return new Token(TokenType.COLON, this.spanFrom(tokenStart), ":"); //$NON-NLS-1$ } else if (next == '=') { state = 5; } else if (next == '<') { state = 3; } else if (next == '>') { state = 4; } else { return this.parseException( Problems.format(Problems.ILLEGAL_SYMBOL, (char) next), tokenStart); } } else if (state == 1) { int next = this.readChar(); if (!Character.isWhitespace(next)) { this.pushBack(next); return new Token(TokenType.SEPERATOR, this.spanFrom(tokenStart), currentString.toString()); } else { currentString.appendCodePoint(next); } } else if (state == 2) { int next = this.readChar(); if (next == '=') { return new Token(TokenType.NEQ, this.spanFrom(tokenStart), "!="); //$NON-NLS-1$ } else { this.pushBack(next); return new Token( TokenType.EXCLAMATION, this.spanFrom(tokenStart), "!"); //$NON-NLS-1$ } } else if (state == 3) { int next = this.readChar(); if (next == '=') { state = 16; } else if (next == '<') { return new Token( TokenType.LEFT_SHIFT, this.spanFrom(tokenStart), "<<"); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.LT, this.spanFrom(tokenStart), "<"); //$NON-NLS-1$ } } else if (state == 4) { int next = this.readChar(); if (next == '=') { return new Token(TokenType.EGT, this.spanFrom(tokenStart), ">="); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.GT, this.spanFrom(tokenStart), ">"); //$NON-NLS-1$ } } else if (state == 5) { int next = this.readChar(); if (next == '=') { return new Token(TokenType.EQ, this.spanFrom(tokenStart), "=="); } else if (next == '>') { return new Token(TokenType.IMPLICATION, this.spanFrom(tokenStart), "=>"); } else { return this.parseException( Problems.format(Problems.ILLEGAL_SYMBOL, (char) next), tokenStart, next); } } else if (state == 6) { int next = this.readChar(); if (next == '|') { return new Token(TokenType.BOOLEAN_OR, this.spanFrom(tokenStart), "||"); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.INT_OR, this.spanFrom(tokenStart), "|"); //$NON-NLS-1$ } } else if (state == 7) { int next = this.readChar(); if (next == '&') { return new Token(TokenType.BOOLEAN_AND, this.spanFrom(tokenStart), "&&"); //$NON-NLS-1$ } else if (next == '|') { return new Token(TokenType.AND_OR, this.spanFrom(tokenStart), "&|"); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.INT_AND, this.spanFrom(tokenStart), "&"); //$NON-NLS-1$ } } else if (state == 8) { int next = this.readChar(); if (next == '.') { return new Token(TokenType.DOTDOT, this.spanFrom(tokenStart), ".."); //$NON-NLS-1$ } else if (Character.isDigit(next)) { this.pushBackArtificial('0'); this.pushBack('.'); this.pushBack(next); return this.readNumber(); } else { this.pushBack(next); return new Token(TokenType.DOT, this.spanFrom(tokenStart), "."); //$NON-NLS-1$ } } else if (state == 9) { int next = this.readChar(); if (next == '~') { return new Token(TokenType.ADDWAVE, this.spanFrom(tokenStart), "+~"); //$NON-NLS-1$ } else if (next == '=') { return new Token(TokenType.ADDEQUALS, this.spanFrom(tokenStart), "+="); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.ADD, this.spanFrom(tokenStart), "+"); //$NON-NLS-1$ } } else if (state == 10) { int next = this.readChar(); if (next == '>') { return new Token(TokenType.ASSIGNMENT, this.spanFrom(tokenStart), "->"); //$NON-NLS-1$ } else if (next == '=') { return new Token(TokenType.SUBEQUALS, this.spanFrom(tokenStart), "-="); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.SUB, this.spanFrom(tokenStart), "-"); //$NON-NLS-1$ } } else if (state == 12) { int next = this.readChar(); if (next == '!') { return new Token(TokenType.QUEST_EXCALAMTION, this.spanFrom(tokenStart), "?!"); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.QUESTION, this.spanFrom(tokenStart), "?"); //$NON-NLS-1$ } } else if (state == 13) { int next = this.readChar(); if (next == '(') { return new Token(TokenType.LAMBDA, this.spanFrom(tokenStart), "\\("); //$NON-NLS-1$ } else { this.pushBack(next); final Token escaped = this.readToken(); return new EscapedToken(this.spanFrom(tokenStart), escaped); } } else if (state == 14) { int next = this.readChar(); if (next == '^') { return new Token(TokenType.XOR, this.spanFrom(tokenStart), "^^"); //$NON-NLS-1$ } else if (next == 'T') { return new Token(TokenType.TRANSPOSE, this.spanFrom(tokenStart), "^T"); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.POWER, this.spanFrom(tokenStart), "^"); //$NON-NLS-1$ } } else if (state == 15) { int next = this.readChar(); if (next == '/') { return new Token(TokenType.INTDIV, this.spanFrom(tokenStart), "//"); //$NON-NLS-1$ } else { this.pushBack(next); return new Token(TokenType.DIV, this.spanFrom(tokenStart), "/"); //$NON-NLS-1$ } } else if (state == 16) { int next = this.readChar(); if (next == '>') { return new Token(TokenType.EQUIVALENCE, this.spanFrom(tokenStart), "<=>"); } else { this.pushBack(next); return new Token(TokenType.ELT, this.spanFrom(tokenStart), "<="); } } else { throw new IllegalStateException("unhandled state: " + state); //$NON-NLS-1$ } } return new Token(TokenType.EOS, new Position(tokenStart - 1, tokenStart)); } /** * Reads the 'radix' operator which changes the representation of a number into * a number system with the given radix. * * @return A {@link Token} which contains the radix in {@link Token#getLongValue()}. * @throws ParseException If a lexical error appears. */ private Token readRadixOperator() throws ParseException { int tokenStart = this.getStreamIndex() - 1; // include the skipped '0' int state = 0; int radix = 0; while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (next != 'x') { this.pushBack('0'); this.pushBack(next); return this.readNumber(); } else { state = 1; } } else if (state == 1) { int next = this.readChar(); if (Character.isDigit(next)) { this.pushBack(next); state = 2; } else { return this.parseException( Problems.format(Problems.MISSING_RADIX), tokenStart, next); } } else if (state == 2) { int next = this.readChar(); if (Character.isDigit(next)) { radix = radix * 10 + Character.digit(next, 10); } else if (next == ':') { if (radix > Character.MAX_RADIX) { return this.parseException( Problems.format(Problems.HIGH_RADIX, radix, Character.MAX_RADIX), tokenStart); } return new Token(TokenType.RADIX, this.spanFrom(tokenStart), radix); } } } return this.parseException(Problems.format(Problems.INVALID_0X), tokenStart); } /** * Reads a String-literal. A String-literal starts with a " and ends at the next * ". In between there may occur any other char. * * @return A new String Token. * @throws ParseException If no closing quotes could be found. */ private Token readString() throws ParseException { int tokenStart = this.getStreamIndex(); StringBuilder lexem = new StringBuilder(); int state = 0; Token escapeError = null; while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (next == '"') { //lexem.append(next); //do not append quotes to string literal state = 1; } else { return this.parseException(Problems.format(Problems.INVALID_STRING), tokenStart, next); } } else if (state == 1) { int start = this.getStreamIndex(); int next = this.readChar(); if (next == '\\') { if (!this.readEscapeSequence(lexem)) { next = this.readChar(); escapeError = this.parseException( Problems.format(Problems.INVALID_ESCAPE, (char) next), start); } } else if (next == '"') { //lexem.append(next); //see above if (escapeError != null) { return escapeError; } return new Token(TokenType.STRING, this.spanFrom(tokenStart), lexem.toString()); } else if (next == -1) { // HACK: to avoid errors if closing quotes are missing // This is subject to ISSUE: 0000022 break; } else { lexem.appendCodePoint(next); } } } this.pushBack(-1); return this.parseException(Problems.format(Problems.UNCLOSED_STRING), tokenStart); } private boolean readEscapeSequence(StringBuilder lexem) throws ParseException { // -1 to include the '\' which was consumed by #readString() while (!this.eos()) { int next = this.readChar(); if (next == '"') { lexem.append('"'); return true; } else if (next == 'n') { lexem.append(System.lineSeparator()); return true; } else if (next == '\\') { lexem.append('\\'); return true; } else { this.pushBack(next); return false; } } return false; } private Token readChannel() throws ParseException { int tokenStart = this.getStreamIndex(); StringBuilder lexem = new StringBuilder(); int state = 0; while (!this.eos()) { int next = this.readChar(); switch (state) { case 0: if (InputScanner.isIdentifierPart(next) || next == '-') { this.pushBack(next); state = 1; } else { return this.parseException(Problems.format(Problems.INVALID_CHANNEL, lexem), tokenStart); } break; case 1: if (InputScanner.isIdentifierPart(next) || next == '-') { lexem.appendCodePoint(next); } else { this.pushBack(next); return new Token(TokenType.CHANNEL, this.spanFrom(tokenStart), "#" + lexem.toString()); //$NON-NLS-1$ } } } return this.parseException(Problems.format(Problems.INVALID_CHANNEL, lexem), tokenStart); } private Token readUser() throws ParseException { int tokenStart = this.getStreamIndex() - 1; // include @ sign int state = 0; StringBuilder lexem = new StringBuilder(); // ISSUE: 0000031 // Userliterals can contain "-" which interferes with a following assignment // operator. This is now fixed by a special treatment with a lookahead to check // if an assignment operator is following. while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (InputScanner.isIdentifierPart(next) || next == '[' || next == ']' || next == ':') { lexem.appendCodePoint(next); } else if (next == '-') { state = 1; } else { this.pushBack(next); return new Token(TokenType.USER, this.spanFrom(tokenStart), lexem.toString()); } } else if (state == 1) { int next = this.readChar(); if (next == '>') { this.pushBack('-'); this.pushBack('>'); return new Token(TokenType.USER, this.spanFrom(tokenStart), lexem.toString()); } else { lexem.append("-"); //$NON-NLS-1$ this.pushBack(next); state = 0; } } } return this.parseException(Problems.format(Problems.INVALID_USER, lexem), tokenStart); } /** * Reads an Identifier from the stream. Identifier can start with a letter or a * underscore followed by any letters, numbers or underscores. * If the identifier ends with a colon (':'), a user literal is returned. Before * an identifier Token is returned, it is checked by * {@link #identifierToToken(String, int)} whether it is a reserved keyword. * * @return An identifier Token or a User Token. * @throws ParseException If any invalid character occurs. */ private Token readIdentifier() throws ParseException { int tokenStart = this.getStreamIndex(); int state = 0; StringBuilder lexem = new StringBuilder(); // ISSUE: 0000027 // Fixed by adding state 0, which expects an identifier-start-character and then // switches to state 1 which expects identifier-part-characters. while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (InputScanner.isIdentifierStart(next)) { state = 1; lexem.appendCodePoint(next); } else { return this.parseException(Problems.format(Problems.INVALID_IDENTIFIER), tokenStart); } } else if (state == 1) { int next = this.readChar(); if (InputScanner.isIdentifierPart(next)) { lexem.appendCodePoint(next); /*} else if (next == ':') { //lexem.append(next); // do not append ':' to username return new Token(TokenType.USER, this.spanFrom(tokenStart), lexem.toString());*/ } else { this.pushBack(next); return this.identifierToToken(lexem.toString(), tokenStart); } } } return this.parseException(Problems.format(Problems.INVALID_IDENTIFIER2, lexem), tokenStart); } /** * Converts an identifier to a keyword token if it represents any. Otherwise, an * identifier is returned. * * @param string The String, representing the identifier. * @param tokenStart The stream index where this token begins. * @return An Identifier Token or a reserved keyword token. * @throws ParseException If {@code string} is an invalid identifier. */ private Token identifierToToken(String string, int tokenStart) throws ParseException { TokenType lookup = this.keywords.get(string); if (lookup == null) { return new Token(TokenType.IDENTIFIER, this.spanFrom(tokenStart), string); } else if (lookup == TokenType.UNKNOWN) { return this.parseException( Problems.format(Problems.INVALID_IDENTIFIER2, string), tokenStart); } else { return new Token(lookup, this.spanFrom(tokenStart)); } } /* * States for this method: * 0: entry state * 1: read at least one number and a dot * 3: read at least one number and a dot and know that there is at least one number * to come. * 4. read a float or int literal followed by a '�' * 5: read the beginning of a date: a number, a dot, a number, a dot * 6: */ /** * Main function to read all kinds of literals which start with numbers. * * It calls {@link #readTimeSpan(int, int)} if it assumes that this is a timespan, * or {@link #readTime(int, int, boolean)} if it assumes that this is a time. If it * is a normal number (int or float), this method reads it to the end. * * @return The read token. * @throws ParseException If the read characters form no valid Number or * DateTime-Token. */ private Token readNumber() throws ParseException { int tokenStart = this.getStreamIndex(); int state = 0; int firstPart = 0; // first part of a time or a date. Also used as radix when // reading a radixed integer literal int secondPart = 0; // second part of a date (months) int thirdPart = 0; // year-part of a date int tmp = 0; // first part of a time, if read after a date Token timeToken = null;//new Token(TokenType.DATETIME, this.spanFrom(0), new Date()); double dec = 1; double value = 0.0; double exp = 0.0; double exp_sign = 1.0; while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (Character.isDigit(next)) { value = value * 10 + Character.digit(next, 10); firstPart = firstPart * 10 + Character.digit(next, 10); } else if (next == '#') { return this.readRadixedInteger(tokenStart, firstPart); } else if (InputScanner.isTimeLiteralChar(next)) { this.pushBack(next); return this.readTimeSpan(firstPart, tokenStart); } else if (next == ':') { return this.readTime(firstPart, tokenStart, true); } else if (next == '.') { state = 1; } else if (next == '°') { // degree character state = 4; } else if (next == 'E' || next == 'e') { state = 9; } else { this.pushBack(next); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value); } } else if (state == 1) { int next = this.readChar(); if (Character.isDigit(next)) { this.pushBack(next); state = 3; } else if (next == '.') { this.pushBack('.'); this.pushBack('.'); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value); } else { return this.parseException(Problems.format(Problems.MISSING_DECIMALS), tokenStart, next); } } else if (state == 3) { int next = this.readChar(); if (Character.isDigit(next)) { dec *= 0.1; value += (double) Character.digit(next, 10) * dec; secondPart = secondPart * 10 + Character.digit(next, 10); } else if (next == '.') { /* Till now we read the beginning of a date literal missing the * year: xx.xx. * or this might be a decimal number followed by a dotdot operator * if the next char is a '.' * In the latter case, we return the so far read number and pushback * two dots. */ state = 5; } else if (next == 'E' || next == 'e') { state = 9; } else if (next == '°') { //degree character state = 4; } else { this.pushBack(next); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value); } } else if (state == 4) { return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), Math.toRadians(value)); } else if (state == 5) { int next = this.readChar(); /* This is no Date Literal, but a decimal number followed by a * dotdot operator. So pushback the two dots and return the number. */ if (next == '.') { this.pushBack('.'); this.pushBack('.'); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value); } // HACK: Need to ensure that at least on number has been read before // reading on. if (firstPart > 31 || secondPart > 12) { return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } if (Character.isDigit(next)) { thirdPart = thirdPart * 10 + Character.digit(next, 10); } else if (next == '@') { state = 6; } else { this.pushBack(next); Calendar c = Calendar.getInstance(); c.set(Calendar.DAY_OF_MONTH, firstPart); c.set(Calendar.MONTH, secondPart); c.set(Calendar.YEAR, thirdPart); timeToken = new Token(TokenType.DATETIME, this.spanFrom(tokenStart), c.getTime()); state = 8; } } else if (state == 6) { int next = this.readChar(); if (Character.isDigit(next)) { this.pushBack(next); state = 7; } else { return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart, next); } } else if (state == 7) { int next = this.readChar(); if (Character.isDigit(next)) { tmp = tmp * 10 + Character.digit(next, 10); } else if (next == ':') { timeToken = this.readTime(tmp, tokenStart, false); if (timeToken.matches(TokenType.ERROR)) { return timeToken; } state = 8; } else { return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart, next); } } else if (state == 8) { Calendar c = Calendar.getInstance(); if (thirdPart < 100) { final int year = c.get(Calendar.YEAR); final int millenium = year - year % 1000; thirdPart += millenium; } if (thirdPart > 9999) { return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } // CONSIDER ISSUE 0000115 c.setTime(timeToken.getDateValue()); c.set(thirdPart, secondPart - 1, firstPart); return new Token(TokenType.DATETIME, this.spanFrom(tokenStart), c.getTime()); } else if (state == 9) { int next = this.readChar(); if (Character.isDigit(next)) { this.pushBack(next); state = 10; } else if (next == '-') { exp_sign = -1.0; state = 10; } else if (next == '+') { exp_sign = 1.0; state = 10; } else { return this.parseException(Problems.format(Problems.INVALID_NUMBER), tokenStart, next); } } else if (state == 10) { int next = this.readChar(); if (Character.isDigit(next)) { exp = exp * 10 + Character.digit(next, 10); } else { this.pushBack(next); // HACK: Ensure that at least one number has been read if (exp == 0.0) { return this.parseException( Problems.format(Problems.INVALID_NUMBER), tokenStart); } value = value * Math.pow(10, exp * exp_sign); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value); } } // state } // while throw new IllegalStateException("should not be reachable"); //$NON-NLS-1$ } private Token readTimeSpan(int value, int tokenStart) throws ParseException { Set<Integer> odd = new TreeSet<Integer>(); int state = 0; int tmp = value; value = 0; while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (InputScanner.isTimeLiteralChar(next)) { if (odd.contains(next)) { return this.parseException( Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } odd.add(next); value += tmp * InputScanner.timeLiteralValue(next); tmp = 0; } else if (Character.isDigit(next)) { this.pushBack(next); state = 1; } else { this.pushBack(next); /*Calendar c = Calendar.getInstance(); c.add(Calendar.SECOND, (int) value); return new Token(TokenType.DATETIME, this.spanFrom(tokenStart), c.getTime());*/ return new Token(TokenType.TIMESPAN, this.spanFrom(tokenStart), value); } } else if (state == 1) { int next = this.readChar(); if (InputScanner.isTimeLiteralChar(next)) { this.pushBack(next); state = 0; } else if (Character.isDigit(next)) { tmp = tmp * 10 + Character.digit(next, 10); } } } return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } /** * <p>Reads the end of a time specification from a String. 'End' means, that the * first part (the hour part) must already have been read.</p> * * <p>To determine if this is really a time rather than a normal number, the method * {@link #readNumber()} consumes numbers until it encounters a {@code colon}. * It then passes the so far read numbers to this method to read the rest of the * time. If the next char encountered is no number and {@code exceptNumber} is * set to {@code true}, this method will return a Number-Token, representing the * so far read characters. If set to {@code false}, this method will throw a * {@link ParseException}.</p> * * @param firstPart The so far read part of the time (the hour-part). * @param tokenStart The beginning index of the currently read token. * @param exceptNumber Determines if this method breaks up if it is not a completely * valid time, or if it will return a Number-Token instead (see description * above!). * @return Most likely a DateTime-Token. * @throws ParseException If the read characters form no valid DateTime-Token. */ private Token readTime(int firstPart, int tokenStart, boolean exceptNumber) throws ParseException { int state = 0; int secondPart = 0; if (firstPart > 23) { return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (Character.isDigit(next)) { state = 1; secondPart = secondPart * 10 + Character.digit(next, 10); } else if (exceptNumber){ this.pushBack(':'); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), (double) firstPart); } else { return this.parseException( Problems.format(Problems.INVALID_DATE_TIME), tokenStart, next); } } else if (state == 1) { int next = this.readChar(); if (!Character.isDigit(next)) { pushBack(next); } else { secondPart = secondPart * 10 + Character.digit(next, 10); } if (secondPart > 59) { return this.parseException( Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } Calendar c = Calendar.getInstance(); c.set(Calendar.HOUR_OF_DAY, firstPart); c.set(Calendar.MINUTE, secondPart); c.set(Calendar.SECOND, 0); return new Token(TokenType.DATETIME, this.spanFrom(tokenStart), c.getTime()); } } return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), tokenStart); } private Token readRadixedInteger(int tokenStart, int radix) throws ParseException { int value = 0; int state = 0; if (radix > Character.MAX_RADIX) { return this.parseException( Problems.format(Problems.HIGH_RADIX, radix, Character.MAX_RADIX), tokenStart); } while (!this.eos()) { if (state == 0) { int next = this.readChar(); if (InputScanner.isDigit(next, radix)) { this.pushBack(next); state = 1; } else { return this.parseException( Problems.format(Problems.INVALID_RADIXED_INT), tokenStart); } } else if (state == 1) { int next = this.readChar(); if (InputScanner.isDigit(next, radix)) { value = value * radix + Character.digit(next, radix); } else { this.pushBack(next); return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), (double) value); } } } // not reachable return this.parseException(Problems.format(Problems.INVALID_RADIXED_INT), tokenStart); } /** * Reports a lexical error to the {@link ProblemReporter} of this scanner. * * @param errorMessage The parse error message. * @param tokenStart The beginning of the errornous token. * @return A token with type {@link TokenType#ERROR} * @throws ParseException If the {@link ProblemReporter} only supports one * Problem. */ protected Token parseException(String errorMessage, int tokenStart) throws ParseException { final Position pos = this.spanFrom(tokenStart); return new Token(TokenType.ERROR, pos, errorMessage); } protected Token parseException(String errorMessage, int tokenStart, int next) throws ParseException { this.pushBack(next); final Position pos = this.spanFrom(tokenStart); this.readChar(); return new Token(TokenType.ERROR, pos, errorMessage); } /** * Determines whether the char c is a valid symbol for a number literal with * the given radix. E.g. for radix = 16, this method would return <code>true</code> * if c was eiter of <code>0123456789ABCDEFabcdef</code>. * * @param c The character to test. * @param radix The radix. * @return <code>true</code> iff the char is a valid symbol for the given radix. */ protected static boolean isDigit(int c, int radix) { return Character.digit(c, radix) != -1; } /** * Determines whether the given codepoint os a valid part of a polly identifier. * * @param token The character to check. * @return <code>true</code> if it is a valid identifier part. */ protected static boolean isIdentifierPart(int token) { return Character.isJavaIdentifierPart(token) && token != '$'; } /** * Determines whether the given codepoint is a valid char to start a polly * identifier. * * @param token The character to check. * @return <code>true</code> if the character can start a polly identifier. */ protected static boolean isIdentifierStart(int token) { return Character.isJavaIdentifierStart(token) && token != '$'; } // Fixed ISSUE: 0000010: Added characters 'w' and 'y' for week and year. /** * Determines if a character is a valid time modifier. That is if it is any of the * following: {@code 'h' | 'm' | 's' | 'd' | 'w' | 'y'}. * @param token The lexical token to check. * @return Whether the token is a time modifier. */ protected static boolean isTimeLiteralChar(int token) { return token == 'h' || token == 'm' || token == 's' || token == 'd' || token == 'w' || token == 'y'; } /** * Gets the value for a time modifier. That is for * {@code 's' = 1, 'm' = 60, 'h' = 3600, 'd' = 86400, 'w' = 604800, 'y' = 31536000}. * * @param token TimeModifier character. * @return The value the character represents in terms of a TimeModifier token. * @throws IllegalArgumentException If no valid time modifier char as determined by * {@link #isTimeLiteralChar(int)} is entered. */ protected static int timeLiteralValue(int token) { switch (token) { case 's': return 1; case 'm': return 60; case 'h': return 3600; case 'd': return 86400; case 'w': return 604800; case 'y': return 31536000; default: throw new IllegalArgumentException("No valid time modifier char."); //$NON-NLS-1$ } } }