/******************************************************************************* * Copyright (c) 2012, 2012 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Bruno Medeiros - initial API and implementation *******************************************************************************/ package dtool.parser.common; import static melnorme.utilbox.core.Assert.AssertNamespace.assertNotNull; import static melnorme.utilbox.core.Assert.AssertNamespace.assertTrue; import dtool.parser.DeeTokens; import dtool.parser.DeeLexerErrors; public abstract class AbstractLexer { protected static final int EOF = -1; protected static final short ASCII_LIMIT = 127; protected final String source; protected int tokenStartPos = 0; protected int pos = 0; // Temporary variable. When a match is finished this will be token end position. protected DeeTokens tokenType; // type for the last matched token protected DeeLexerErrors tokenError; // error for the last matched token public AbstractLexer(String source) { this.source = assertNotNull(source); } public final String getSource() { return source; } public int getLexingPosition() { return pos; } /** Gets the character from absolute position index. */ public final int getCharacter(int index) { if(index >= source.length()) { return -1; } return source.charAt(index); } protected final int lookAhead(int offset) { return getCharacter(pos + offset); } protected final int lookAhead() { return getCharacter(pos); } public final void reset(int newTokenStartPosition) { assertTrue(newTokenStartPosition >= 0 && newTokenStartPosition <= source.length()); pos = newTokenStartPosition; tokenType = null; } public final Token next() { parseToken(); Token token = createParsedToken(); assertTrue(token.getEndPos() == pos); return token; } public void parseToken() { tokenType = null; tokenError = null; tokenStartPos = pos; doParseToken(); } protected Token createParsedToken() { String value = tokenType.hasSourceValue() ? tokenType.getSourceValue() : // Minor optimization here, don't allocate string for these token types source.substring(tokenStartPos, pos); if(tokenError != null) { return new Token.ErrorToken(tokenType, value, tokenStartPos, tokenError); } return new Token(tokenType, value, tokenStartPos); } protected abstract Void doParseToken(); protected final Void endMatchWithError(DeeTokens tokenType, DeeLexerErrors errorType) { this.tokenError = errorType; return endMatch(tokenType); } protected final Void endMatch(DeeTokens tokenType) { this.tokenType = tokenType; return null; } protected final Void matchTokenFromStartPos(DeeTokens tokenCode, int length) { pos = tokenStartPos + length; return endMatch(tokenCode); } /* ------------------------ Helpers ------------------------ */ /** Advance position until any of given strings is found, or input reaches EOF. * Returns the index in given strings array of the matched string (position is advanced to end of string), * or -1 if EOF was encountered (position is advanced to EOF). * If input can match more than one string, priority is given to string with lowest index in given strings, * so ordering is important. */ protected final int seekTo(final String[] strings) { while(true) { int i = 0; boolean matchesAny = false; for (; i < strings.length; i++) { matchesAny = inputMatchesSequence(strings[i]); if(matchesAny) { break; } } if(matchesAny) { pos += strings[i].length(); return i; } else if(lookAhead(0) == -1) { return -1; } else { pos++; } } } /** Optimization of {@link #seekTo(String[])} method for 1 String */ protected final int seekTo(String string) { while(true) { boolean matches = inputMatchesSequence(string); if(matches) { pos += string.length(); return 0; } else if(lookAhead() == -1) { return -1; } else { pos++; } } } /** Optimization of {@link #seekTo(String[])} method for 1 char */ protected final int seekTo(char endChar) { while(true) { int ch = lookAhead(0); if(ch == -1) { return -1; } pos++; if(ch == endChar) { return 0; } } } /** Optimization of {@link #seekTo(String[])} method for 2 char */ protected final int seekTo(char endChar1, char endChar2) { while(true) { int ch = lookAhead(); if(ch == EOF) { return EOF; } pos++; if(ch == endChar1) { return 0; } else if(ch == endChar2) { return 1; } } } protected final int seekToNewline() { while(true) { int ch = lookAhead(); if(ch == EOF) { return EOF; } pos++; if(ch == '\r') { if(lookAhead() == '\n') { pos++; } return 0; } else if(ch == '\n') { return 0; } } } /*---------------------------------------*/ protected final void readNewline() { int result = readNewlineOrEOF(); assertTrue(result == 0); } protected final int readNewlineOrEOF() { int ch = lookAhead(); if(ch == '\r') { pos++; if(lookAhead() == '\n') { pos++; } return 0; } else if(ch == '\n') { pos++; return 0; } else if(ch == EOF){ return 1; } else { return -1; } } /*---------------------------------------*/ static { assertTrue( ((int)-1) != ((char)-1) ); } // inputMatchesSequence relies on this /** Returns true if the sequence from current position matches given string. */ protected final boolean inputMatchesSequence(CharSequence string) { int length = string.length(); for (int i = 0; i < length; i++) { int ch = lookAhead(i); if(ch != string.charAt(i)) { return false; } } return true; } /** Optimization of {@link #inputMatchesSequence(CharSequence)} , since String is final and not an interface */ protected final boolean inputMatchesSequence(String string) { int length = string.length(); for (int i = 0; i < length; i++) { int ch = lookAhead(i); if(ch != string.charAt(i)) { return false; } } return true; } /* ------------------------ ------------------------ */ protected final Void rule3Choices(char ch1, DeeTokens tk1, char ch2, DeeTokens tk2, DeeTokens tokenElse) { if(lookAhead(1) == ch1) { return matchTokenFromStartPos(tk1, 2); } else if(lookAhead(1) == ch2) { return matchTokenFromStartPos(tk2, 2); } else { return matchTokenFromStartPos(tokenElse, 1); } } protected final Void rule2Choices(char ch1, DeeTokens tk1, DeeTokens tokenElse) { if(lookAhead(1) == ch1) { return matchTokenFromStartPos(tk1, 2); } else { return matchTokenFromStartPos(tokenElse, 1); } } }