InputScanner.java example

Explorer
polly-master
- projects
package de.skuzzle.polly.core.parser;

import java.nio.charset.Charset;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import de.skuzzle.polly.core.parser.problems.Problems;




/*
 * This class is subject to ISSUE: 0000018
 * Need to proof working with different encodings.
 */



/**
 * Extends an {@link AbstractTokenStream} to read actual {@link Token}s from an input 
 * String. The following identifiers are read as keywords:
 * <pre>
 * true
 * false
 * now
 * </pre>
 * 
 * <p>Identifiers may start with a _ or a letter and can then contain any letter, 
 * number or further _.</p>
 * 
 * <p>Numbers read may have a decimal part and optionally a mantiss part 
 * (scientific notation). More formally numbers may have the format
 * {@code \d+(\.\d+)?([+-]?[eE]\d+)?}</p>
 * 
 * <p>There are a few different kinds of date-tokens which after they are read, all 
 * contain a {@link Date} value. A date may be a pure time definition like 
 * <code>\d{1,2}:\d{1,2}</code> where the first number must lie in the interval 
 * {@code [0;23]} and the second number in {@code [0;59]}.</p>
 * 
 * <p>Further a date-token can be a pure date like <code>\d{1,2}.\d{1,2}.\d{4}</code> 
 * where the three parts must be valid for date definitions.</p>
 * 
 * <p>And a date can be specified by an amount of time from now. Like {@code 1d4h10m} 
 * specifies a date with the value one day, four hours and 10 minutes from now.
 * Valid characters for such a date definition are:</p>
 * <pre>
 * y    year    interpreted as 365d
 * w    week    interpreted as 7d
 * d    day     interpreted as 24h
 * h    hour    interpreted as 60m
 * m    minute  interpreted as 60s
 * s    second  interpreted as 1000ms
 * </pre>
 * 
 * <p>Each of this characters may only occur once within one of such date definition but
 * must not be ordered.</p>
 * 
 * @author Simon
 *
 */
public class InputScanner extends AbstractTokenStream {
    
    /**
     * The maximum radix value for radix'ed integers. Higher values will cause a
     * {@link ParseException} to be thrown when hitting on.
     * 
     * Note: The minimum value is always 2 (by nature)
     */
    public final static int MAX_RADIX = Character.MAX_RADIX;
    
    

    protected Map<String, TokenType> keywords;
    private boolean skipWhiteSpaces;
    
    
    
    public InputScanner(String stream) {
        super(stream);
        this.prepareKeywords();
    }
    
    
    
    public InputScanner(String stream, Charset charset) {
        super(stream, charset);
        this.prepareKeywords();
    }
    
    
    
    protected void prepareKeywords() {
        this.keywords = new HashMap<String, TokenType>();
        this.keywords.put("xor", TokenType.XOR); //$NON-NLS-1$
        this.keywords.put("true", TokenType.TRUE); //$NON-NLS-1$
        this.keywords.put("false", TokenType.FALSE); //$NON-NLS-1$
        this.keywords.put("now", TokenType.DATETIME); //$NON-NLS-1$
        this.keywords.put("polly", TokenType.POLLY); //$NON-NLS-1$
        this.keywords.put("public", TokenType.PUBLIC); //$NON-NLS-1$
        this.keywords.put("temp", TokenType.TEMP); //$NON-NLS-1$
        this.keywords.put("help", TokenType.QUESTION); //$NON-NLS-1$
        this.keywords.put("if", TokenType.IF); //$NON-NLS-1$
        this.keywords.put("del", TokenType.DELETE); //$NON-NLS-1$
        this.keywords.put("inspect", TokenType.INSPECT); //$NON-NLS-1$
        this.keywords.put("list", TokenType.LIST); //$NON-NLS-1$
        this.keywords.put("delay", TokenType.DELAY); //$NON-NLS-1$
        this.keywords.put("reinterpret", TokenType.REINTERPRET); //$NON-NLS-1$
        /* To avoid 1char identifiers "_" */
        this.keywords.put("_", TokenType.UNKNOWN); //$NON-NLS-1$
    }
    
    
    
    /**
     * Sets whether whitespaces are currently being skipped.
     * 
     * @param value Whether whitespaces should be skipped.
     */
    public void setSkipWhiteSpaces(boolean value) {
        this.skipWhiteSpaces = value;
    }
    
    
    
    /**
     * Gets whether whitespaces are currently being skipped.
     * 
     * @return Whether whitespaces are currently being skipped.
     */
    public boolean skipWhiteSpaces() {
        return this.skipWhiteSpaces;
    }

    
    
    @Override
    public boolean match(TokenType type) throws ParseException {
        if (type == TokenType.SEPERATOR && this.skipWhiteSpaces) {
            throw new IllegalArgumentException(
                    "can not match token type SEPARATOR while 'skipWhiteSpaces' is enabled");
        }
        return super.match(type);
    }
    
    
    
    @Override
    protected synchronized Token readToken() throws ParseException {
        final Token next = this.readTokenInternal();
        if (ParserProperties.should(ParserProperties.ENABLE_SCANNER_DEBUGGING)) {
            System.out.println(next.toString());
        }
        return next;
    }
    
    
    
    protected final Token readTokenInternal() throws ParseException {
        int state = 0;
        int tokenStart = this.getStreamIndex();
        StringBuilder currentString = new StringBuilder();
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (next == -1) {
                    final Position pos = new Position(tokenStart, 
                        this.getStreamIndex() + 1);
                    return new Token(TokenType.EOS, pos);
                }
                
                if (Character.isWhitespace(next)) {
                    if (!this.skipWhiteSpaces) {
                        this.pushBack(next);
                        state = 1;
                    } else {
                        // skipping whitespaces, so move token start
                        ++tokenStart;
                    }
                    
                } else if (InputScanner.isIdentifierStart(next)) {
                    this.pushBack(next);
                    return this.readIdentifier();
                    
                } else if (next == '0') {
                    /*
                     * '0' cannot start a number, but starts the 0x: Operator
                     */
                    return this.readRadixOperator();
                } else if (Character.isDigit(next)) {
                    this.pushBack(next);
                    return this.readNumber();
                    
                } else if (next == '"') {
                    this.pushBack(next);
                    return this.readString();
                    
                } else if (next == '#') {
                	return this.readChannel();
                	
                } else if (next == '+') {
                    state = 9;
                } else if (next == '-') {
                    state = 10;
                } else if (next == '.') {
                    state = 8;
                } else if (next == '@') {
                    return this.readUser();
                } else if (next == ',') {
                    return new Token(TokenType.COMMA, this.spanFrom(tokenStart), ","); //$NON-NLS-1$
                } else if (next == '*') {
                    return new Token(TokenType.MUL, this.spanFrom(tokenStart), "*"); //$NON-NLS-1$
                } else if (next == '/') {
                    state = 15;
                } else if (next == '\\') {
                    state = 13;
                } else if (next == '%') {
                    return new Token(TokenType.MOD, this.spanFrom(tokenStart), "%"); //$NON-NLS-1$
                } else if (next == '$') {
                    return new Token(TokenType.DOLLAR, this.spanFrom(tokenStart),"$"); //$NON-NLS-1$
                } else if (next == '^') {
                    state = 14;
                } else if (next == '!') {
                    state = 2;
                } else if (next == '(') {
                    return new Token(TokenType.OPENBR, this.spanFrom(tokenStart), "("); //$NON-NLS-1$
                } else if (next == ')') {
                    return new Token(TokenType.CLOSEDBR, this.spanFrom(tokenStart), ")"); //$NON-NLS-1$
                } else if (next == '[') {
                    return new Token(TokenType.OPENSQBR, this.spanFrom(tokenStart), "["); //$NON-NLS-1$
                } else if (next == ']') {
                    return new Token(TokenType.CLOSEDSQBR, this.spanFrom(tokenStart), "]"); //$NON-NLS-1$
                } else if (next == '{') {
                    return new Token(TokenType.OPENCURLBR, this.spanFrom(tokenStart), "{"); //$NON-NLS-1$
                } else if (next == '}') {
                    return new Token(TokenType.CLOSEDCURLBR, this.spanFrom(tokenStart), "}"); //$NON-NLS-1$
                } else if (next == '?') {
                    state = 12;
                } else if (next == '~') {
                    return new Token(TokenType.WAVE, this.spanFrom(tokenStart), "~"); //$NON-NLS-1$
                } else if (next == ';') {
                    return new Token(TokenType.SEMICOLON, this.spanFrom(tokenStart), ";"); //$NON-NLS-1$
                } else if (next == '&') {
                    state = 7;
                } else if (next == '|') {
                    state = 6;
                } else if (next == ':') {
                    return new Token(TokenType.COLON, this.spanFrom(tokenStart), ":"); //$NON-NLS-1$
                } else if (next == '=') {
                    state = 5;
                } else if (next == '<') {
                    state = 3;
                } else if (next == '>') {
                    state = 4;
                } else {
                    return this.parseException(
                        Problems.format(Problems.ILLEGAL_SYMBOL, (char) next),
                        tokenStart);
                }
                    
            } else if (state == 1) {
                int next = this.readChar();
                
                if (!Character.isWhitespace(next)) {
                    this.pushBack(next);
                    return new Token(TokenType.SEPERATOR, this.spanFrom(tokenStart), 
                            currentString.toString());
                } else {
                    currentString.appendCodePoint(next);
                }
                
            } else if (state == 2) {
                int next = this.readChar();
                
                if (next == '=') {
                    return new Token(TokenType.NEQ, this.spanFrom(tokenStart), "!="); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(
                            TokenType.EXCLAMATION, this.spanFrom(tokenStart), "!"); //$NON-NLS-1$
                }
                
            } else if (state == 3) {
                int next = this.readChar();
                
                if (next == '=') {
                    state = 16;
                } else if (next == '<') {
                    return new Token(
                        TokenType.LEFT_SHIFT, this.spanFrom(tokenStart), "<<"); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.LT, this.spanFrom(tokenStart), "<"); //$NON-NLS-1$
                }
                
            } else if (state == 4) {
                int next = this.readChar();
                
                if (next == '=') {
                    return new Token(TokenType.EGT, this.spanFrom(tokenStart), ">="); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.GT, this.spanFrom(tokenStart), ">"); //$NON-NLS-1$
                }
                
            } else if (state == 5) {
                int next = this.readChar();
                
                if (next == '=') {
                    return new Token(TokenType.EQ, this.spanFrom(tokenStart), "==");
                } else if (next == '>') {
                        return new Token(TokenType.IMPLICATION, 
                                this.spanFrom(tokenStart), "=>");
                } else {
                    return this.parseException(
                        Problems.format(Problems.ILLEGAL_SYMBOL, (char) next),
                        tokenStart, next);
                }
                
            } else if (state == 6) {
                int next = this.readChar();
                
                if (next == '|') {
                    return new Token(TokenType.BOOLEAN_OR, this.spanFrom(tokenStart), "||"); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.INT_OR, this.spanFrom(tokenStart), "|"); //$NON-NLS-1$
                }
                
            } else if (state == 7) {
                int next = this.readChar();
                
                if (next == '&') {
                    return new Token(TokenType.BOOLEAN_AND, this.spanFrom(tokenStart), "&&"); //$NON-NLS-1$
                } else if (next == '|') {
                    return new Token(TokenType.AND_OR, this.spanFrom(tokenStart), "&|"); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.INT_AND, this.spanFrom(tokenStart), "&"); //$NON-NLS-1$
                }
                
            } else if (state == 8) {
                int next = this.readChar();
                
                if (next == '.') {
                    return new Token(TokenType.DOTDOT, this.spanFrom(tokenStart), ".."); //$NON-NLS-1$
                } else if (Character.isDigit(next)) {
                    this.pushBackArtificial('0');
                    this.pushBack('.');
                    this.pushBack(next);
                    return this.readNumber();
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.DOT, this.spanFrom(tokenStart), "."); //$NON-NLS-1$
                }
            } else if (state == 9) {
                int next = this.readChar();
                
                if (next == '~') {
                    return new Token(TokenType.ADDWAVE, this.spanFrom(tokenStart), "+~"); //$NON-NLS-1$
                } else if (next == '=') {
                    return new Token(TokenType.ADDEQUALS, this.spanFrom(tokenStart), 
                        "+="); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.ADD, this.spanFrom(tokenStart), "+"); //$NON-NLS-1$
                }
                
            } else if (state == 10) {
                int next = this.readChar();
                
                if (next == '>') {
                    return new Token(TokenType.ASSIGNMENT, this.spanFrom(tokenStart), "->"); //$NON-NLS-1$
                } else if (next == '=') {
                    return new Token(TokenType.SUBEQUALS, this.spanFrom(tokenStart), 
                        "-="); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.SUB, this.spanFrom(tokenStart), "-"); //$NON-NLS-1$
                }
            } else if (state == 12) {
                int next = this.readChar();
                
                if (next == '!') {
                    return new Token(TokenType.QUEST_EXCALAMTION, 
                        this.spanFrom(tokenStart), "?!"); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.QUESTION, this.spanFrom(tokenStart), "?"); //$NON-NLS-1$
                }
            } else if (state == 13) {
                int next = this.readChar();
                
                if (next == '(') {
                    return new Token(TokenType.LAMBDA, this.spanFrom(tokenStart), "\\("); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    final Token escaped = this.readToken();
                    return new EscapedToken(this.spanFrom(tokenStart), escaped);
                }
            } else if (state == 14) {
                int next = this.readChar();
                
                if (next == '^') {
                    return new Token(TokenType.XOR, this.spanFrom(tokenStart), "^^"); //$NON-NLS-1$
                } else if (next == 'T') {
                    return new Token(TokenType.TRANSPOSE, this.spanFrom(tokenStart), "^T"); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.POWER, this.spanFrom(tokenStart), "^"); //$NON-NLS-1$
                }
            } else if (state == 15) {
                int next = this.readChar();
                
                if (next == '/') {
                    return new Token(TokenType.INTDIV, this.spanFrom(tokenStart), "//"); //$NON-NLS-1$
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.DIV, this.spanFrom(tokenStart), "/"); //$NON-NLS-1$
                }
            } else if (state == 16) {
                int next = this.readChar();
                
                if (next == '>') {
                    return new Token(TokenType.EQUIVALENCE, this.spanFrom(tokenStart), "<=>");
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.ELT, this.spanFrom(tokenStart), "<=");
                }
            } else {
                throw new IllegalStateException("unhandled state: " + state); //$NON-NLS-1$
            }
        }
        
        return new Token(TokenType.EOS, new Position(tokenStart - 1, tokenStart));
    }
    
    
    
    /**
     * Reads the 'radix' operator which changes the representation of a number into
     * a number system with the given radix.
     * 
     * @return A {@link Token} which contains the radix in {@link Token#getLongValue()}.
     * @throws ParseException If a lexical error appears.
     */
    private Token readRadixOperator() throws ParseException {
        int tokenStart = this.getStreamIndex() - 1; // include the skipped '0'
        int state = 0;
        int radix = 0;
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (next != 'x') {
                    this.pushBack('0');
                    this.pushBack(next);
                    return this.readNumber();
                } else {
                    state = 1;
                }
            } else if (state == 1) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    this.pushBack(next);
                    state = 2;
                } else {
                    return this.parseException(
                        Problems.format(Problems.MISSING_RADIX),
                            tokenStart, next);
                }
            } else if (state == 2) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    radix = radix * 10 + Character.digit(next, 10);
                } else if (next == ':') {
                    
                    if (radix > Character.MAX_RADIX) {
                        return this.parseException(
                            Problems.format(Problems.HIGH_RADIX, radix, 
                                Character.MAX_RADIX),
                            tokenStart);
                    }
                    
                    return new Token(TokenType.RADIX, this.spanFrom(tokenStart), radix);
                }
            }
        }
        
        return this.parseException(Problems.format(Problems.INVALID_0X), tokenStart);
    }
    
    
    
    /**
     * Reads a String-literal. A String-literal starts with a " and ends at the next
     * ". In between there may occur any other char. 
     * 
     * @return A new String Token.
     * @throws ParseException If no closing quotes could be found.
     */
    private Token readString() throws ParseException {
        int tokenStart = this.getStreamIndex();
        StringBuilder lexem = new StringBuilder();
        int state = 0;
        Token escapeError = null;
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (next == '"') {
                    //lexem.append(next);   //do not append quotes to string literal 
                    state = 1;
                } else {
                    return this.parseException(Problems.format(Problems.INVALID_STRING), 
                        tokenStart, next);
                }
            } else if (state == 1) {
                int start = this.getStreamIndex();
                int next = this.readChar();
                
                if (next == '\\') {
                    
                    if (!this.readEscapeSequence(lexem)) {
                        next = this.readChar();
                        
                        escapeError = this.parseException(
                            Problems.format(Problems.INVALID_ESCAPE, (char) next), start);
                    }
                } else if (next == '"') {
                    //lexem.append(next);   //see above
                    if (escapeError != null) {
                        return escapeError;
                    }
                    return new Token(TokenType.STRING, this.spanFrom(tokenStart), 
                            lexem.toString());
                } else if (next == -1) {
                    // HACK: to avoid errors if closing quotes are missing
                    //       This is subject to ISSUE: 0000022
                    break;
                } else {
                    lexem.appendCodePoint(next);
                }
            }
        }
        
        this.pushBack(-1);
        return this.parseException(Problems.format(Problems.UNCLOSED_STRING), tokenStart);
    }
    
    
    
    private boolean readEscapeSequence(StringBuilder lexem) throws ParseException {
        // -1 to include the '\' which was consumed by #readString()

        while (!this.eos()) {
            int next = this.readChar();
            
            if (next == '"') {
                lexem.append('"');
                return true;
            } else if (next == 'n') {
                lexem.append(System.lineSeparator());
                return true;
            } else if (next == '\\') {
                lexem.append('\\');
                return true;
            } else {
                this.pushBack(next);
                return false;
            }
        }
        return false;
    }
    
    
    
    private Token readChannel() throws ParseException {
        int tokenStart = this.getStreamIndex();
        StringBuilder lexem = new StringBuilder();

        int state = 0;
        while (!this.eos()) {
            int next = this.readChar();
            
            switch (state) {
            case 0:
                
                if (InputScanner.isIdentifierPart(next) || next == '-') {
                    this.pushBack(next);
                    state = 1;
                } else {
                    return this.parseException(Problems.format(Problems.INVALID_CHANNEL, lexem), 
                            tokenStart);
                }
                break;
            case 1:
                
                if (InputScanner.isIdentifierPart(next) || next == '-') {
                    lexem.appendCodePoint(next);
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.CHANNEL, 
                            this.spanFrom(tokenStart), "#" + lexem.toString()); //$NON-NLS-1$
                }
                
            }
        
        }
        
        return this.parseException(Problems.format(Problems.INVALID_CHANNEL, lexem), 
            tokenStart);
    }
    
    
    
    private Token readUser() throws ParseException {
        int tokenStart = this.getStreamIndex() - 1; // include @ sign
        int state = 0;
        StringBuilder lexem = new StringBuilder();

        // ISSUE: 0000031
        // Userliterals can contain "-" which interferes with a following assignment
        // operator. This is now fixed by a special treatment with a lookahead to check
        // if an assignment operator is following.
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
            
                if (InputScanner.isIdentifierPart(next) || next == '[' || next == ']' || next == ':') {
                    lexem.appendCodePoint(next);
                } else if (next == '-') {
                    state = 1;
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.USER, 
                            this.spanFrom(tokenStart), lexem.toString());
                }
            } else if (state == 1) {
                int next = this.readChar();
                
                if (next == '>') {
                    this.pushBack('-');
                    this.pushBack('>');
                    return new Token(TokenType.USER, 
                        this.spanFrom(tokenStart), lexem.toString());
                } else {
                    lexem.append("-"); //$NON-NLS-1$
                    this.pushBack(next);
                    state = 0;
                }
            }
        }
        
        return this.parseException(Problems.format(Problems.INVALID_USER, lexem), 
            tokenStart);
    }
    
    
    
    /**
     * Reads an Identifier from the stream. Identifier can start with a letter or a
     * underscore followed by any letters, numbers or underscores.
     * If the identifier ends with a colon (':'), a user literal is returned. Before
     * an identifier Token is returned, it is checked by
     * {@link #identifierToToken(String, int)} whether it is a reserved keyword.
     * 
     * @return An identifier Token or a User Token.
     * @throws ParseException If any invalid character occurs.
     */
    private Token readIdentifier() throws ParseException {
        int tokenStart = this.getStreamIndex();
        int state = 0;
        StringBuilder lexem = new StringBuilder();
        
        // ISSUE: 0000027
        // Fixed by adding state 0, which expects an identifier-start-character and then
        // switches to state 1 which expects identifier-part-characters.
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (InputScanner.isIdentifierStart(next)) {
                    state = 1;
                    lexem.appendCodePoint(next);
                } else {
                    return this.parseException(Problems.format(Problems.INVALID_IDENTIFIER), 
                        tokenStart);
                }
            } else if (state == 1) {
                int next = this.readChar();
                
                if (InputScanner.isIdentifierPart(next)) {
                    lexem.appendCodePoint(next);
                /*} else if (next == ':') {
                    //lexem.append(next); // do not append ':' to username
                    return new Token(TokenType.USER, this.spanFrom(tokenStart), 
                            lexem.toString());*/
                } else {
                    this.pushBack(next);
                    return this.identifierToToken(lexem.toString(), tokenStart);
                }
            }
        }
        
        return this.parseException(Problems.format(Problems.INVALID_IDENTIFIER2, lexem), 
            tokenStart);
    }
    
    
    
    /**
     * Converts an identifier to a keyword token if it represents any. Otherwise, an
     * identifier is returned.
     * 
     * @param string The String, representing the identifier.
     * @param tokenStart The stream index where this token begins.
     * @return An Identifier Token or a reserved keyword token.
     * @throws ParseException If {@code string} is an invalid identifier.
     */
    private Token identifierToToken(String string, int tokenStart) throws ParseException {
        TokenType lookup = this.keywords.get(string);
        if (lookup == null) {
            return new Token(TokenType.IDENTIFIER, this.spanFrom(tokenStart), string);
        } else if (lookup == TokenType.UNKNOWN) {
            return this.parseException(
                Problems.format(Problems.INVALID_IDENTIFIER2, string), 
                tokenStart);
        } else {
            return new Token(lookup, this.spanFrom(tokenStart));
        }
    }
    
        
    
    /*
     * States for this method:
     * 0: entry state
     * 1: read at least one number and a dot
     * 3: read at least one number and a dot and know that there is at least one number
     *    to come.
     * 4. read a float or int literal followed by a '�'
     * 5: read the beginning of a date: a number, a dot, a number, a dot
     * 6: 
     */
    
    /**
     * Main function to read all kinds of literals which start with numbers.
     * 
     * It calls {@link #readTimeSpan(int, int)} if it assumes that this is a timespan,
     * or {@link #readTime(int, int, boolean)} if it assumes that this is a time. If it
     * is a normal number (int or float), this method reads it to the end.
     * 
     * @return The read token.
     * @throws ParseException If the read characters form no valid Number or 
     *      DateTime-Token.
     */
    private Token readNumber() throws ParseException {
        int tokenStart = this.getStreamIndex();
        int state = 0;
        int firstPart = 0;  // first part of a time or a date. Also used as radix when
                            // reading a radixed integer literal
        int secondPart = 0; // second part of a date (months)
        int thirdPart = 0;  // year-part of a date
        int tmp = 0;        // first part of a time, if read after a date
        Token timeToken = null;//new Token(TokenType.DATETIME, this.spanFrom(0), new Date());    
        double dec = 1;
        double value = 0.0;
        double exp = 0.0;
        double exp_sign = 1.0;

        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    value = value * 10 + Character.digit(next, 10);
                    firstPart = firstPart * 10 + Character.digit(next, 10);
                } else if (next == '#') {
                    return this.readRadixedInteger(tokenStart, firstPart);
                } else if (InputScanner.isTimeLiteralChar(next)) {
                    this.pushBack(next);
                    return this.readTimeSpan(firstPart, tokenStart);
                } else if (next == ':') {
                    return this.readTime(firstPart, tokenStart, true);
                } else if (next == '.') {
                    state = 1;
                } else if (next == '°') {  // degree character
                    state = 4;
                } else if (next == 'E' || next == 'e') {
                    state = 9;
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value);
                }
                
            } else if (state == 1) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    this.pushBack(next);
                    state = 3;
                } else if (next == '.') {
                    this.pushBack('.');
                    this.pushBack('.');
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value);
                } else {
                    return this.parseException(Problems.format(Problems.MISSING_DECIMALS), 
                        tokenStart, next);
                }
                
            } else if (state == 3) {
                int next = this.readChar();

                if (Character.isDigit(next)) {
                    dec *= 0.1;
                    value += (double) Character.digit(next, 10) * dec;
                    secondPart = secondPart * 10 + Character.digit(next, 10);
                } else if (next == '.') {
                    /* Till now we read the beginning of a date literal missing the
                     * year: xx.xx.
                     * or this might be a decimal number followed by a dotdot operator
                     * if the next char is a '.'
                     * In the latter case, we return the so far read number and pushback 
                     * two dots.
                     */
                    state = 5;
                } else if (next == 'E' || next == 'e') {
                    state = 9;
                } else if (next == '°') { //degree character
                    state = 4;
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value);
                }
                
            } else if (state == 4) {
                return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), 
                        Math.toRadians(value));
                
            } else if (state == 5) {
                int next = this.readChar();
                
                /* This is no Date Literal, but a decimal number followed by a
                 * dotdot operator. So pushback the two dots and return the number.
                 */
                if (next == '.') {
                    this.pushBack('.');
                    this.pushBack('.');
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value);
                }
                
                // HACK: Need to ensure that at least on number has been read before 
                //       reading on.
                if (firstPart > 31 || secondPart > 12) {
                    return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
                        tokenStart);
                }
                
                if (Character.isDigit(next)) {
                    thirdPart = thirdPart * 10 + Character.digit(next, 10);
                } else if (next == '@') {
                    state = 6;
                } else {
                    this.pushBack(next);
                    Calendar c = Calendar.getInstance();
                    c.set(Calendar.DAY_OF_MONTH, firstPart);
                    c.set(Calendar.MONTH, secondPart);
                    c.set(Calendar.YEAR, thirdPart);
                    timeToken = new Token(TokenType.DATETIME, this.spanFrom(tokenStart), 
                        c.getTime());
                    state = 8;
                }                

            } else if (state == 6) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    this.pushBack(next);
                    state = 7;
                } else {
                    return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
                        tokenStart, next);
                }
            } else if (state == 7) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    tmp = tmp * 10 + Character.digit(next, 10);
                } else if (next == ':') {
                    timeToken = this.readTime(tmp, tokenStart, false);
                    if (timeToken.matches(TokenType.ERROR)) {
                        return timeToken;
                    }
                    state = 8;
                } else {
                    return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
                        tokenStart, next);
                }
                
            } else if (state == 8) {
                Calendar c = Calendar.getInstance();
                if (thirdPart < 100) {
                    final int year = c.get(Calendar.YEAR);
                    final int millenium = year - year % 1000;
                    thirdPart += millenium;
                }
                if (thirdPart > 9999) {
                    return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
                        tokenStart);
                }
                
                // CONSIDER ISSUE 0000115
                c.setTime(timeToken.getDateValue());
                c.set(thirdPart, secondPart - 1, firstPart);
                return new Token(TokenType.DATETIME, this.spanFrom(tokenStart), 
                        c.getTime());
                
            } else if (state == 9) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    this.pushBack(next);
                    state = 10;
                } else if (next == '-') {
                    exp_sign = -1.0;
                    state = 10;
                } else if (next == '+') {
                    exp_sign = 1.0;
                    state = 10;
                } else {
                    return this.parseException(Problems.format(Problems.INVALID_NUMBER), 
                        tokenStart, next);
                }
                
            } else if (state == 10) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    exp = exp * 10 + Character.digit(next, 10);
                } else {
                    this.pushBack(next);
                    
                    // HACK: Ensure that at least one number has been read
                    if (exp == 0.0) {
                        return this.parseException(
                            Problems.format(Problems.INVALID_NUMBER), tokenStart);
                    }
                    value = value * Math.pow(10, exp * exp_sign);
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), value);
                }
            } // state
        } // while
        
        throw new IllegalStateException("should not be reachable"); //$NON-NLS-1$
    }
    
    

    private Token readTimeSpan(int value, int tokenStart) throws ParseException {
        Set<Integer> odd = new TreeSet<Integer>();
        int state = 0;
        int tmp = value;
        value = 0;
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (InputScanner.isTimeLiteralChar(next)) {
                    
                    if (odd.contains(next)) {
                        return this.parseException(
                            Problems.format(Problems.INVALID_DATE_TIME), 
                            tokenStart);
                    }
                    odd.add(next);
                    value += tmp * InputScanner.timeLiteralValue(next);
                    tmp = 0;
                } else if (Character.isDigit(next)) {
                    this.pushBack(next);
                    state = 1;
                } else {
                    this.pushBack(next);
                    
                    /*Calendar c = Calendar.getInstance();
                    c.add(Calendar.SECOND, (int) value);
                    return new Token(TokenType.DATETIME, this.spanFrom(tokenStart),
                            c.getTime());*/
                    return new Token(TokenType.TIMESPAN, this.spanFrom(tokenStart),
                        value);
                }
                
            } else if (state == 1) {
                int next = this.readChar();
                
                if (InputScanner.isTimeLiteralChar(next)) {
                    this.pushBack(next);
                    state = 0;
                } else if (Character.isDigit(next)) {
                    tmp = tmp * 10 + Character.digit(next, 10);
                }
            }
        }
        
        return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
            tokenStart);
    }
    
    
    
    /**
     * <p>Reads the end of a time specification from a String. 'End' means, that the 
     * first part (the hour part) must already have been read.</p>
     * 
     * <p>To determine if this is really a time rather than a normal number, the method
     * {@link #readNumber()} consumes numbers until it encounters a {@code colon}.
     * It then passes the so far read numbers to this method to read the rest of the 
     * time. If the next char encountered is no number and {@code exceptNumber} is
     * set to {@code true}, this method will return a Number-Token, representing the
     * so far read characters. If set to {@code false}, this method will throw a
     * {@link ParseException}.</p>
     * 
     * @param firstPart The so far read part of the time (the hour-part).
     * @param tokenStart The beginning index of the currently read token.
     * @param exceptNumber Determines if this method breaks up if it is not a completely
     *      valid time, or if it will return a Number-Token instead (see description 
     *      above!).
     * @return Most likely a DateTime-Token.
     * @throws ParseException If the read characters form no valid DateTime-Token.
     */
    private Token readTime(int firstPart, int tokenStart, 
            boolean exceptNumber) throws ParseException {
        int state = 0;
        int secondPart = 0;
        
        if (firstPart > 23) {
            return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
                tokenStart);
        }
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (Character.isDigit(next)) {
                    state = 1;
                    secondPart = secondPart * 10 + Character.digit(next, 10);
                } else if (exceptNumber){
                    this.pushBack(':');
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), 
                            (double) firstPart);
                } else {
                    return this.parseException(
                        Problems.format(Problems.INVALID_DATE_TIME), tokenStart, next);
                }
            } else if (state == 1) {
                int next = this.readChar();
                
                if (!Character.isDigit(next)) {
                    pushBack(next);
                } else {
                    secondPart = secondPart * 10 + Character.digit(next, 10);
                }
                
                if (secondPart > 59) {
                    return this.parseException(
                        Problems.format(Problems.INVALID_DATE_TIME), tokenStart);
                }
                
                Calendar c = Calendar.getInstance();
                c.set(Calendar.HOUR_OF_DAY, firstPart);
                c.set(Calendar.MINUTE, secondPart);
                c.set(Calendar.SECOND, 0);

                return new Token(TokenType.DATETIME, this.spanFrom(tokenStart), 
                        c.getTime());
                
            }
        }
        
        return this.parseException(Problems.format(Problems.INVALID_DATE_TIME), 
            tokenStart);
    }
    
    
    
    private Token readRadixedInteger(int tokenStart, int radix) throws ParseException {
        int value = 0;
        int state = 0;
        
        if (radix > Character.MAX_RADIX) {
            return this.parseException(
                Problems.format(Problems.HIGH_RADIX, radix, Character.MAX_RADIX), 
                tokenStart);
        }
        
        while (!this.eos()) {
            if (state == 0) {
                int next = this.readChar();
                
                if (InputScanner.isDigit(next, radix)) {
                    this.pushBack(next);
                    state = 1;
                } else {
                    return this.parseException(
                        Problems.format(Problems.INVALID_RADIXED_INT), tokenStart);
                }
            } else if (state == 1) {
                int next = this.readChar();
                
                if (InputScanner.isDigit(next, radix)) {
                    value = value * radix + Character.digit(next, radix);
                } else {
                    this.pushBack(next);
                    return new Token(TokenType.NUMBER, this.spanFrom(tokenStart), 
                        (double) value);
                }
            }
        }
        
        // not reachable
        return this.parseException(Problems.format(Problems.INVALID_RADIXED_INT), 
            tokenStart);
    }
    
    
    
    /**
     * Reports a lexical error to the {@link ProblemReporter} of this scanner.
     * 
     * @param errorMessage The parse error message.
     * @param tokenStart The beginning of the errornous token.
     * @return A token with type {@link TokenType#ERROR}
     * @throws ParseException If the {@link ProblemReporter} only supports one
     *          Problem.
     */
    protected Token parseException(String errorMessage, int tokenStart) 
            throws ParseException {
        final Position pos = this.spanFrom(tokenStart);
        return new Token(TokenType.ERROR, pos, errorMessage);
    }
    
    
    
    protected Token parseException(String errorMessage, int tokenStart, int next) 
            throws ParseException {
        this.pushBack(next);
        final Position pos = this.spanFrom(tokenStart);
        this.readChar();
        return new Token(TokenType.ERROR, pos, errorMessage);
    }
    
    
    
    /**
     * Determines whether the char c is a valid symbol for a number literal with
     * the given radix. E.g. for radix = 16, this method would return <code>true</code>
     * if c was eiter of <code>0123456789ABCDEFabcdef</code>.
     *  
     * @param c The character to test.
     * @param radix The radix.
     * @return <code>true</code> iff the char is a valid symbol for the given radix.  
     */
    protected static boolean isDigit(int c, int radix) {        
        return Character.digit(c, radix) != -1;
    }
    
    
    
    /**
     * Determines whether the given codepoint os a valid part of a polly identifier.
     * 
     * @param token The character to check.
     * @return <code>true</code> if it is a valid identifier part.
     */
    protected static boolean isIdentifierPart(int token) {
        return Character.isJavaIdentifierPart(token) && token != '$';
    }
    
    
    
    /**
     * Determines whether the given codepoint is a valid char to start a polly 
     * identifier.
     * 
     * @param token The character to check.
     * @return <code>true</code> if the character can start a polly identifier.
     */
    protected static boolean isIdentifierStart(int token) {
        return Character.isJavaIdentifierStart(token) && token != '$';
    }
    
    
    
    // Fixed ISSUE: 0000010: Added characters 'w' and 'y' for week and year.
    /**
     * Determines if a character is a valid time modifier. That is if it is any of the
     * following: {@code 'h' | 'm' | 's' | 'd' | 'w' | 'y'}.
     * @param token The lexical token to check.
     * @return Whether the token is a time modifier.
     */
    protected static boolean isTimeLiteralChar(int token) {
        return token == 'h' || token == 'm' || token == 's' || token == 'd' || 
                token == 'w' || token == 'y';
    }
    
    
    
    /**
     * Gets the value for a time modifier. That is for 
     * {@code 's' = 1, 'm' = 60, 'h' = 3600, 'd' = 86400, 'w' = 604800, 'y' = 31536000}.
     * 
     * @param token TimeModifier character. 
     * @return The value the character represents in terms of a TimeModifier token.
     * @throws IllegalArgumentException If no valid time modifier char as determined by 
     *      {@link #isTimeLiteralChar(int)} is entered.
     */
    protected static int timeLiteralValue(int token) {
        switch (token) {
            case 's': return 1;
            case 'm': return 60;
            case 'h': return 3600;
            case 'd': return 86400;
            case 'w': return 604800;
            case 'y': return 31536000;
            default:
                throw new IllegalArgumentException("No valid time modifier char."); //$NON-NLS-1$
        }
    }
}