package me.august.lumen.compile.scanner; import me.august.lumen.common.Chars; import me.august.lumen.compile.scanner.tokens.ImportPathToken; import me.august.lumen.compile.scanner.tokens.NumberToken; import me.august.lumen.compile.scanner.tokens.StringToken; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.*; import java.util.function.Consumer; import static me.august.lumen.compile.scanner.Type.*; public class LumenScanner implements TokenSource { private static Map<String, Type> KEYWORD_MAP = new HashMap<>(); // initialize keyword map static { for (Type type : Type.values()) { if (type.getKeywords() == null) continue;; for (String keyword : type.getKeywords()) { KEYWORD_MAP.put(keyword, type); } } } private static Map<Type, Consumer<LumenScanner>> KEYWORD_HANDLERS = new HashMap<>(); private Reader reader; private int currentPosition; private int lastRecordedPosition; // in case we need to insert more tokens // before the next token is read private Stack<Token> queuedTokens = new Stack<>(); public LumenScanner(Reader reader) { this.reader = reader; } public LumenScanner(String source) { this.reader = new StringReader(source); } // ========================= // Local utility methods // ========================= private int read() { try { currentPosition++; return reader.read(); } catch (IOException e) { // revert previous increment if // reading fails currentPosition--; return -1; } } private int peek() { try { reader.mark(1); int peek = reader.read(); reader.reset(); return peek; } catch (IOException e) { return -1; } } private boolean accept(int chr) { if (peek() == chr) { read(); return true; } else { return false; } } private int advanceRecorder() { int recorded = lastRecordedPosition; lastRecordedPosition = currentPosition; return recorded; } private void consumeWhitespace() { while (peek() == ' ') read(); } private Token newToken(Type type) { return newToken(type, null); } private Token newToken(Type type, String source) { return new Token(source, advanceRecorder(), currentPosition, type); } @Override public Token nextToken() { if (!queuedTokens.empty()) return queuedTokens.pop(); while (true) { int chr = read(); switch (chr) { // End of file (or error) reached case -1: return newToken(EOF); // Bracket tokens case '(': return newToken(L_PAREN); case ')': return newToken(R_PAREN); case '{': return newToken(L_BRACE); case '}': return newToken(R_BRACE); case '[': return newToken(L_BRACKET); case ']': return newToken(R_BRACKET); case '"': case '\'': return nextStringLiteral((char) chr); // arithmetic operators case '+': return diffPlus(); case '-': return diffMin(); case '*': return newToken(MULT); case '/': return newToken(DIV); case '%': return newToken(REM); // punctuation case ',': return newToken(COMMA); case '.': return diffDots(); case ':': return diffColon(); case '>': return diffGT(); case '<': return diffLT(); case '|': return diffOr(); case '&': return diffAnd(); case '~': return newToken(BIT_COMP); case '!': return diffBang(); case '?': return newToken(QUESTION); case '=': return diffEq(); case '#': consumeComment(); case ' ' : case '\n': case '\r': case '\t': lastRecordedPosition = currentPosition; continue; default: { char c = (char) chr; if (Chars.isIdentifierStart(c)) { return nextIdentifier(c); } else if (Chars.isDigit(c)) { return nextNumber(c); } } } } } /** * Reads the next string literal * @param startQuote The starting quote type (" or ') * @return A StringToken with the string literal's content */ private StringToken nextStringLiteral(char startQuote) { // Contents of the string literal StringBuilder builder = new StringBuilder(); while (true) { int read = read(); if (read == startQuote) { break; } else if (read == '\\') { builder.append(nextStringEscapeSequence()); } else if (read == -1) { throw new IllegalStateException("Unexpected EOF in string literal"); } else { builder.append((char) read); } } StringToken.QuoteType quoteType = startQuote == '"' ? StringToken.QuoteType.DOUBLE : StringToken.QuoteType.SINGLE; return new StringToken( builder.toString(), quoteType, advanceRecorder(), currentPosition ); } /** * Gets the character corresponding to the next * escape sequence. * @return The next escape sequence's character value */ private char nextStringEscapeSequence() { int chr = read(); switch (chr) { // single-letter sequences case 'b': return '\b'; case 't': return '\t'; case 'n': return '\n'; case 'f': return '\f'; case 'r': return '\r'; // special characters case '\'': return '\''; case '\\': return '\\'; // unicode character case 'u': { int hex = 0; // read 4 hex digits (0-9, a-f, A-F) for (int i = 0; i < 4; i++) { int ord = read(); if (ord >= '0' && ord <= '9') { // set value to numeric value // of the character ord -= '0'; } else if (ord >= 'a' && ord <= 'f') { // add back 10 so a = 10 ord -= ('a' - 10); } else if (ord >= 'A' && ord <= 'F') { // add back 10 so A = 10 ord -= ('A' - 10); } else { throw new IllegalStateException( "Invalid hexadecimal digit: " + (char) ord ); } // append value to hex number hex = hex * 16 + ord; } return (char) hex; } // octal literal // maximum value 377 (255 dec) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { int ord = chr - '0'; if (ord <= 3) { int peek = peek(); for (int i = 0; i < 2; i++) { if (peek >= '0' && peek <= '7') { read(); ord = ord * 8 + (peek - '0'); peek = peek(); } } } return (char) ord; } default: throw new IllegalStateException( "Unexpeced start of escape sequence: " + chr ); } } private Token nextIdentifier(char first) { String identifier = nextPlainIdentifier(first); if (identifier.equals("is") || identifier.equals("isnt")) { Token next = nextToken(); if (next.getType() == IDENTIFIER && next.getContent().equals("a")) { Type type = identifier.equals("is") ? INSTANCEOF_KEYWORD : NOT_INSTANCEOF_KEYWORD; return newToken(type); } else { queuedTokens.push(next); } } if (KEYWORD_MAP.containsKey(identifier)) { Token tok = newToken(KEYWORD_MAP.get(identifier)); if (KEYWORD_HANDLERS.containsKey(tok.getType())) { KEYWORD_HANDLERS.get(tok.getType()).accept(this); } return tok; } return newToken(IDENTIFIER, identifier); } private String nextPlainIdentifier(char first) { return nextPlainIdentifier().insert(0, first).toString(); } private StringBuilder nextPlainIdentifier() { StringBuilder sb = new StringBuilder(); while (peek() > -1 && Chars.isIdentifierRest((char) peek())) { sb.append((char) read()); } return sb; } // Methods for differentiating tokens private Token diffPlus() { if (accept('+')) { return newToken(INC); // ++ } return newToken(PLUS); // + } private Token diffMin() { if (accept('-')) { return newToken(DEC); // -- } else if (accept('>')) { return newToken(R_ARROW); // -> } return newToken(MIN); // - } private Token diffDots() { if (accept('.')) { if (accept('.')) { return newToken(RANGE_INCLUSIVE); // ... } return newToken(RANGE_EXCLUSIVE); // .. } return newToken(DOT); // . } private Token diffColon() { if (accept(':')) { return newToken(SEP); // :: } return newToken(COLON); // : } private Token diffGT() { if (accept('=')) { return newToken(GTE); // >= } else if (accept('>')) { if (accept('>')) { return newToken(U_SH_R); // ">>>"; } return newToken(SH_R); // ">>"; } return newToken(GT); // ">" } private Token diffLT() { if (accept('=')) { return newToken(LTE); // "<=" } else if (accept('<')) { return newToken(SH_L); // "<<" } return newToken(LT); // "<" } private Token diffOr() { if (accept('|')) { return newToken(LOGIC_OR); // } return newToken(BIT_OR); } private Token diffAnd() { if (accept('&')) { return newToken(LOGIC_AND); } return newToken(BIT_AND); } private Token diffBang() { if (accept('=')) { return newToken(NE); // != } return newToken(NOT); // ! } private Token diffEq() { if (accept('=')) { return newToken(EQ); // == } return newToken(ASSIGN); // = } private Token nextNumber(char first) { StringBuilder sb = new StringBuilder(); int base = getPrefixBase(first); if (base != 16 && base != 2) { sb.append(first); } while (peek() > -1 && isValidChar((char) peek(), base)) { //(Chars.isDigit((char) peek()) || Chars.isAlpha((char) peek()))) { if (!isValidChar((char) peek(), base)) { throw new RuntimeException("Illegal digit: " + (char) peek()); } sb.append((char) read()); } if (peek() == '.') { sb.append((char) read()); int peek = peek(); while (peek > -1 && Chars.isDigit((char) peek)) { sb.append((char) read()); peek = peek(); } } if (peek() == 'e') { sb.append((char) read()); if (peek() == '+' || peek() == '-') { sb.append((char) read()); } while (Chars.isDigit((char) peek())) { sb.append((char) read()); } } Class<? extends Number> type = getSuffixType(); Number num = parseNumber(sb.toString(), base, type); return new NumberToken(num, advanceRecorder(), currentPosition); } // Numbers private int getPrefixBase(char chr) { if (chr > '9' || chr < '0') { return -1; } if (chr != '0') { return 10; } switch (peek()) { case 'x': case 'X': read(); return 16; case 'b': case 'B': read(); return 2; default: return 8; } } private Class<? extends Number> getSuffixType() { switch (peek()) { case 'f': case 'F': read(); return Float.class; case 'd': case 'D': read(); return Double.class; case 'l': case 'L': read(); return Long.class; default: return null; } } private boolean isValidChar(char chr, int radix) { return Character.digit(chr, radix) > -1; } private Number parseNumber(String num, int base, Class<? extends Number> type) { if (base != 10 && (num.contains(".") || num.contains("e"))) { throw new RuntimeException("Illegal base prefix"); } Number result; if (num.contains(".") || num.contains("e")) { result = Double.valueOf(num); } else { Long converted = Long.parseLong(num, base); if (converted <= Integer.MAX_VALUE && converted >= Integer.MIN_VALUE) { result = converted.intValue(); } else { result = converted; } } if (type == Double.class) { return result.doubleValue(); } else if (type == Float.class) { return result.floatValue(); } else if (type == Long.class) { if (result.getClass() == Long.class || result.getClass() == Integer.class) { return result.longValue(); } else { throw new RuntimeException("Illegal long suffix"); } } else { return result; } } private void consumeComment() { if (peek() == '*') { read(); // noinspection StatementWithEmptyBody while (!(read() == '*' && peek() == '#')); read(); } else { // noinspection StatementWithEmptyBody while (read() != '\n'); } } private void handleImport() { read(); // consume whitespace StringBuilder sb = new StringBuilder(); sb.append(nextPlainIdentifier()); List<String> nodes = null; boolean didEnd = false; while (peek() == '.') { sb.append((char) read()); if (didEnd) // TODO proper exception handling throw new RuntimeException("import statement already terminated"); if (accept('{')) { // once we reach a multi-import, it must be the end didEnd = true; consumeWhitespace(); nodes = new ArrayList<>(); nodes.add(nextPlainIdentifier().toString()); consumeWhitespace(); while (accept(',')) { consumeWhitespace(); nodes.add(nextPlainIdentifier().toString()); } // TODO proper exception handling if (read() != '}') throw new RuntimeException("Expected right brace: }"); } else { sb.append(nextPlainIdentifier()); } } String importPath = sb.toString(); if (nodes == null) { // grab last identifier after dot int lastIdx = importPath.lastIndexOf('.'); nodes = Arrays.asList(importPath.substring(lastIdx + 1)); importPath = importPath.substring(0, lastIdx); } queuedTokens.push(new ImportPathToken( sb.toString(), advanceRecorder(), lastRecordedPosition, importPath, nodes )); } static { KEYWORD_HANDLERS.put(Type.IMPORT_KEYWORD, LumenScanner::handleImport); } }