Scanner.java example

Explorer
GdxStudio-master
/*
 * 03/21/2010
 *
 * Copyright (C) 2010 Robert Futrell
 * robert_futrell at users.sourceforge.net
 * http://fifesoft.com/rsyntaxtextarea
 *
 * This library is distributed under a modified BSD license.  See the included
 * RSTALanguageSupport.License.txt file for details.
 */
package org.fife.rsta.ac.java.rjc.lexer;

import java.io.EOFException;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Stack;
import javax.swing.text.BadLocationException;
import javax.swing.text.Document;
import javax.swing.text.Position;


/**
 * A scanner that allows the user to "push back" tokens.  This scanner
 * allows arbitrary lookahead.
 *
 * @author Robert Futrell
 * @version 1.0
 */
public class Scanner {

	private static final boolean DEBUG = false;

	/**
	 * The scanner we delegate to.
	 */
	private SourceCodeScanner s;

	/**
	 * Stack of tokens that have been "pushed back."
	 */
	private Stack stack;

	/**
	 * The depth in which we're in TypeArguments or TypeParameters.
	 */
	private int typeArgLevel;

	/**
	 * If we are parsing text in a Swing <code>JTextComponent</code>, this
	 * should be the document of that component.
	 */
	private Document doc;

	/**
	 * The most recently lexed token, or <code>null</code> if EOS was
	 * reached.
	 */
	private Token mostRecentToken;


	/**
	 * Constructor.  This scanner will return no tokens unless some are pushed
	 * onto it via {@link #yyPushback(Token)}.
	 */
	public Scanner() {
		this((Reader)null);
	}


	/**
	 * Constructor.  This scanner will only return those tokens pushed onto it.
	 *
	 * @param tokens Tokens to return.
	 */
	public Scanner(List tokens) {
		stack = new Stack();
		for (int i=tokens.size()-1; i>=0; i--) {
			stack.push(tokens.get(i));
		}
	}


	/**
	 * Constructor.
	 *
	 * @param r The stream to read from.
	 */
	public Scanner(Reader r) {
		s = r!=null ? new SourceCodeScanner(r) : null;
		s.setKeepLastDocComment(true);
		stack = new Stack();
	}


/**
 * This method is just here for debugging purposes to make sure
 * our parser is sound.
 *
 * @param t A token to push onto the stack (non-<code>null</code>).
 */
private void pushOntoStack(Token t) {
	if (t!=null && !stack.isEmpty() && t.equals(stack.peek())) {
		System.err.println("ERROR: Token being duplicated: " + t);
		Thread.dumpStack();
		System.exit(5);
	}
	else if (t==null) {
		System.err.println("ERROR: null token pushed onto stack");
		Thread.dumpStack();
		System.exit(6);
	}
	stack.push(t);
}


	/**
	 * Decreases the depth in which we're in TypeArguments or TypeParameters.
	 *
	 * @see #increaseTypeArgumentsLevel()
	 * @see #getTypeArgumentsLevel()
	 */
	public void decreaseTypeArgumentsLevel() {
		if (--typeArgLevel<0) {
			throw new InternalError("typeArgLevel dipped below 0");
		}
	}


	/**
	 * Returns an offset into the source being parsed.  This offset will be
	 * tracked if we are parsing code from a Swing <code>JTextComponent</code>.
	 *
	 * @param offs The offset.
	 * @return An object representing the offset.
	 * @see #setDocument(Document)
	 */
	public Offset createOffset(final int offs) {
		if (doc!=null) {
			try {
				return new DocumentOffset(doc.createPosition(offs));
			} catch (BadLocationException ble) { // Should never happen
				ble.printStackTrace();
			}
		}
		return new Offset() {
			public int getOffset() {
				return offs;
			}
		};
	}


	private void debugPrintToken(Token t) {
		if (DEBUG) {
			if (t==null) {
				System.out.println("... null");
			}
			else {
				System.out.println("... " + t);
			}
		}
	}


	/**
	 * Returns the current column into the current line.
	 *
	 * @return The current column.
	 * @see #getLine()
	 */
	public int getColumn() {
		return s.getColumn();
	}


	/**
	 * Returns the last documentation comment parsed.  The "last documentation
	 * comment" is cleared when this method returns.
	 *
	 * @return The last documentation comment parsed, or <code>null</code>
	 *         if there was none.
	 */
	public String getLastDocComment() {
		return s.getLastDocComment();
	}


	/**
	 * Returns the current line into the document.
	 *
	 * @return The current line.
	 * @see #getColumn()
	 */
	public int getLine() {
		return s.getLine();
	}


	/**
	 * Returns the most recently-lexed token.
	 *
	 * @return The token, or <code>null</code> if EOS was reached.
	 */
	public Token getMostRecentToken() {
		return mostRecentToken;
	}


	/**
	 * Returns the current offset into the document.
	 *
	 * @return The offset.
	 */
	public int getOffset() {
		return s.getOffset();
	}


	/**
	 * Eats through (possibly nested) paren pairs, e.g.:
	 * <pre>(int i=0; i<getFoo(getParam()); i++)</pre>.
	 * Blocks nested inside the paren pairs are also skipped.
	 *
	 * @throws IOException If an IO error occurs.
	 * @throws InternalError If the next token is not a '('.
	 */
	public void eatParenPairs() throws IOException {

		Token t = yylex();
		if (t==null || t.getType()!=TokenTypes.SEPARATOR_LPAREN) {
			throw new InternalError("'(' expected, found: " + t);
		}

		int blockDepth = 0;
		int parenDepth = 1;

		while ((t=yylex())!=null) {
			int type = t.getType();
			switch (type) {
				case TokenTypes.SEPARATOR_LBRACE:
					blockDepth++;
					break;
				case TokenTypes.SEPARATOR_RBRACE:
					blockDepth = Math.max(blockDepth-1, 0);
					break;
				case TokenTypes.SEPARATOR_LPAREN:
					if (blockDepth==0) {
						parenDepth++;
					}
					break;
				case TokenTypes.SEPARATOR_RPAREN:
					if (blockDepth==0 && --parenDepth == 0) {
						return;
					}
					break;
			}
		}

	}


	/**
	 * Eats all tokens up to (and including) the next token of the specified
	 * type.  This is useful, for example, to eat until the next semicolon.
	 *
	 * @param tokenType The type of token to eat through.
	 * @throws IOException If an IO error occurs.
	 */
	public void eatThroughNext(int tokenType) throws IOException {
		Token t = null;
		while ((t=yylex())!=null && t.getType()!=tokenType);
	}


	/**
	 * Eats all tokens up to (and including) the next token of the specified
	 * type.  This is useful, for example, to eat until the next semicolon.
	 *
	 * @param tokenType The type of token to eat through.
	 * @throws IOException If an IO error occurs.
	 * @see #eatThroughNextSkippingBlocks(int, int)
	 * @see #eatThroughNextSkippingBlocksAndStuffInParens(int, int)
	 */
	public void eatThroughNextSkippingBlocks(int tokenType) throws IOException {
		Token t = null;
		int blockDepth = 0;
		while ((t=yylex())!=null) {
			int type = t.getType();
			if (type==TokenTypes.SEPARATOR_LBRACE) {
				blockDepth++;
			}
			else if (type==TokenTypes.SEPARATOR_RBRACE) {
				blockDepth--;
			}
			else if (type==tokenType) {
				if (blockDepth<=0) {
					return;
				}
			}
		}
	}


	/**
	 * Eats all tokens up to (and including) the next token of one of the
	 * specified types.  This is useful, for example, to eat until the next
	 * equal sign or semicolon.
	 *
	 * @param tokenType1 The type of token to eat through.
	 * @param tokenType2 Another type of token to eat through.
	 * @return The last token read.  This will either be one of the two token
	 *         types passed in, or <code>null</code> if the end of the stream
	 *         is reached.
	 * @throws IOException If an IO error occurs.
	 * @see #eatThroughNextSkippingBlocksAndStuffInParens(int, int)
	 */
	public Token eatThroughNextSkippingBlocks(int tokenType1,
									int tokenType2) throws IOException {
		Token t = null;
		int blockDepth = 0;
		while ((t=yylex())!=null) {
			int type = t.getType();
			if (type==TokenTypes.SEPARATOR_LBRACE) {
				blockDepth++;
			}
			else if (type==TokenTypes.SEPARATOR_RBRACE) {
				blockDepth--;
			}
			else if (type==tokenType1 || type==tokenType2) {
				if (blockDepth<=0) {
					return t;
				}
			}
		}
		return null;
	}


	/**
	 * Eats all tokens up to (and including) the next token of one of the
	 * specified types.  This is useful, for example, to eat until the next
	 * equal sign or semicolon.
	 *
	 * @param tokenType1 The type of token to eat through.
	 * @param tokenType2 Another type of token to eat through.
	 * @return The last token read.  This will either be one of the two token
	 *         types passed in, or <code>null</code> if the end of the stream
	 *         is reached.
	 * @throws IOException If an IO error occurs.
	 * @see #eatThroughNextSkippingBlocks(int, int)
	 */
	public Token eatThroughNextSkippingBlocksAndStuffInParens(int tokenType1,
									int tokenType2) throws IOException {

		Token t = null;
		int blockDepth = 0;
		int parenDepth = 0;

		while ((t=yylex())!=null) {
			int type = t.getType();
			switch (type) {
				case TokenTypes.SEPARATOR_LBRACE:
					blockDepth++;
					break;
				case TokenTypes.SEPARATOR_RBRACE:
					blockDepth--;
					break;
				case TokenTypes.SEPARATOR_LPAREN:
					parenDepth++;
					break;
				case TokenTypes.SEPARATOR_RPAREN:
					parenDepth--;
					break;
				default:
					if (type==tokenType1 || type==tokenType2) {
						if (blockDepth<=0 && parenDepth<=0) {
							return t;
						}
					}
			}
		}

		return null;

	}


	public void eatUntilNext(int type1, int type2) throws IOException {
		Token t = null;
		while ((t=yylex())!=null) {
			int type = t.getType();
			if (type==type1 || type==type2) {
				yyPushback(t);
				break;
			}
		}
	}


	public void eatUntilNext(int type1, int type2, int type3) throws IOException {
		Token t = null;
		while ((t=yylex())!=null) {
			int type = t.getType();
			if (type==type1 || type==type2 || type==type3) {
				yyPushback(t);
				break;
			}
		}
	}


	/**
	 * Returns the current TypeArgument/TypeParameter level.
	 *
	 * @return The current level.
	 * @see #increaseTypeArgumentsLevel()
	 * @see #decreaseTypeArgumentsLevel()
	 */
	public int getTypeArgumentsLevel() {
		return typeArgLevel;
	}


	/**
	 * Increases the depth in which we're in TypeArguments or TypeParameters.
	 *
	 * @see #decreaseTypeArgumentsLevel()
	 * @see #getTypeArgumentsLevel()
	 */
	public void increaseTypeArgumentsLevel() {
		typeArgLevel++;
	}


private Stack resetPositions;
private Stack currentResetTokenStack;
private int currentResetStartOffset;
	public void markResetPosition() {
		if (s!=null) { // Hack!  We should really do something for token-only scanners
			if (resetPositions==null) {
				resetPositions = new Stack();
			}
			currentResetTokenStack = new Stack();
			resetPositions.push(currentResetTokenStack);
			currentResetStartOffset = s.getOffset();
		}
	}
	public void resetToLastMarkedPosition() {
		if (s!=null) { // Hack!  We should really do something for token-only scanners
			if (currentResetTokenStack==null) {
				throw new InternalError("No resetTokenStack!");
			}
			// Remove tokens off the standard stack within the "marked" range
			while (!stack.isEmpty()) {
				Token t = (Token)stack.peek();
				if (t.getOffset()>=currentResetStartOffset) {
					stack.pop();
				}
				else {
					break;
				}
			}
			// Add all tokens in the "marked" range to our stack
			while (!currentResetTokenStack.isEmpty()) {
				Token t = (Token)currentResetTokenStack.pop();
				stack.push(t);
			}
			resetPositions.pop(); // Remote currentResetTokenStack
			currentResetTokenStack = resetPositions.isEmpty() ? null : (Stack)resetPositions.peek();
			currentResetStartOffset = -1;
		}
	}
	public void clearResetPosition() {
		if (s!=null) { // Hack!  We should really do something for token-only scanners
			if (currentResetTokenStack==null) {
				throw new InternalError("No resetTokenStack!");
			}
			resetPositions.pop(); // Remote currentResetTokenStack
			currentResetTokenStack = resetPositions.isEmpty() ? null : (Stack)resetPositions.peek();
			currentResetStartOffset = -1;
		}
	}

	/**
	 * Sets the Swing <code>Document</code> whose content is being parsed.
	 * This method should be called if we are parsing code inside a
	 * <code>JTextComponent</code>, as it will help our parsed code to track
	 * changes when the document is modified.  If we are parsing source from a
	 * flat file, this method shouldn't be called.
	 *
	 * @param doc The document being parsed.
	 */
	public void setDocument(Document doc) {
		this.doc = doc;
	}


	/**
	 * Skips all bracket pairs ('[' followed by ']') in the stream.
	 *
	 * @return The number of bracket pairs skipped.
	 * @throws IOException If an IO error occurs.
	 */
	public int skipBracketPairs() throws IOException {

		int count = 0;

		while (yyPeekCheckType()==TokenTypes.SEPARATOR_LBRACKET &&
				yyPeekCheckType(2)==TokenTypes.SEPARATOR_RBRACKET) {
			yylex();
			yylex();
			count++;
		}

		return count;

	}


	/**
	 * Returns the next token from the input stream.
	 *
	 * @return The next token.
	 * @throws IOException If an IO error occurs.
	 */
	/*
	 * NOTE: All other lex'ing methods should call into this one.
	 */
	public Token yylex() throws IOException {

		Token t = null;
		if (stack.isEmpty()) {
			t = s!=null ? s.yylex() : null;
		}
		else {
			t = (Token)stack.pop();
		}

		// If we have nested TypeArguments ("Set<Map.Entry<String,String>>"),
		// Prevent the ">>" from coming across as a single token.
		if (typeArgLevel>0 && t!=null && t.isOperator()) {
			String lexeme = t.getLexeme();
			if (lexeme.length()>1) {
				char ch = lexeme.charAt(0);
				if (ch=='<') {
					Token rest = null;
					switch (t.getType()) {
						case TokenTypes.OPERATOR_LTE:
							rest = new TokenImpl(Token.OPERATOR_EQUALS, "=",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
						case TokenTypes.OPERATOR_LSHIFT:
							rest = new TokenImpl(Token.OPERATOR_LT, "<",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
						case TokenTypes.OPERATOR_LSHIFT_EQUALS:
							rest = new TokenImpl(Token.OPERATOR_LTE, "<=",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
					}
					stack.push(rest);
					t = new TokenImpl(Token.OPERATOR_LT, "<",
							t.getLine(), t.getColumn(), t.getOffset());
				}
				else if (ch=='>') {
					Token rest = null;
					switch (t.getType()) {
						case TokenTypes.OPERATOR_GTE:
							rest = new TokenImpl(Token.OPERATOR_EQUALS, "=",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
						case TokenTypes.OPERATOR_RSHIFT:
							rest = new TokenImpl(Token.OPERATOR_GT, ">",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
						case TokenTypes.OPERATOR_RSHIFT2:
							rest = new TokenImpl(Token.OPERATOR_RSHIFT, ">>",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
						case TokenTypes.OPERATOR_RSHIFT_EQUALS:
							rest = new TokenImpl(Token.OPERATOR_GTE, ">=",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
						case TokenTypes.OPERATOR_RSHIFT2_EQUALS:
							rest = new TokenImpl(Token.OPERATOR_RSHIFT_EQUALS, ">>=",
									t.getLine(), t.getColumn()+1, t.getOffset()+1);
							break;
					}
					stack.push(rest);
					t = new TokenImpl(Token.OPERATOR_GT, ">",
							t.getLine(), t.getColumn(), t.getOffset());
				}
			}
		}

		debugPrintToken(t);
		if (currentResetTokenStack!=null) {
			currentResetTokenStack.push(t);
		}
		if (t!=null) { // Don't let EOS corrupt most recent token
			mostRecentToken = t;
		}
		return t;

	}


	/**
	 * Returns the next token from the input stream, or throws an exception
	 * if the end of stream is reached.
	 *
	 * @param error The error description for the exception if the end of
	 *        stream is reached.
	 * @return The token.
	 * @throws IOException If an IO error occurs or the end of stream is
	 *         reached.
	 */
	public Token yylexNonNull(String error) throws IOException {
		Token t = yylex();
		if (t==null) {
			throw new EOFException(error);
		}
		return t;
	}


	/**
	 * Returns the next token from the input stream, or throws an exception
	 * if the end of stream is reached or if the token is not of a given
	 * type.
	 *
	 * @param type The type the token must be.
	 * @param error The error description for the exception if the end of
	 *        stream is reached, or if the token is of an unexpected type.
	 * @return The token.
	 * @throws IOException If an IO error occurs or the end of stream is
	 *         reached, or if the token is of the wrong type.
	 */
	public Token yylexNonNull(int type, String error) throws IOException {
		return yylexNonNull(type, -1, error);
	}


	/**
	 * Returns the next token from the input stream, or throws an exception
	 * if the end of stream is reached or if the token is not of two given
	 * types.
	 *
	 * @param type1 One type the token can be.
	 * @param type2 Another type the token can be, or <tt>-1</tt> if we
	 *        should only check against <tt>type1</tt>.
	 * @param error The error description for the exception if the end of
	 *        stream is reached, or if the token is of an unexpected type.
	 * @return The token.
	 * @throws IOException If an IO error occurs or the end of stream is
	 *         reached, or if the token is of a wrong type.
	 */
	public Token yylexNonNull(int type1, int type2, String error)
								throws IOException {
		return yylexNonNull(type1, type2, -1, error);
	}


	/**
	 * Returns the next token from the input stream, or throws an exception
	 * if the end of stream is reached or if the token is not of three given
	 * types.
	 *
	 * @param type1 One type the token can be.
	 * @param type2 Another type the token can be, or <tt>-1</tt> if we
	 *        should only check against <tt>type1</tt>.
	 * @param type3 Another type the token can be, or <tt>-1</tt> if we
	 *        should only check against <tt>type1</tt> and <tt>type2</tt>.
	 * @param error The error description for the exception if the end of
	 *        stream is reached, or if the token is of an unexpected type.
	 * @return The token.
	 * @throws IOException If an IO error occurs or the end of stream is
	 *         reached, or if the token is of a wrong type.
	 */
	public Token yylexNonNull(int type1, int type2, int type3, String error)
								throws IOException {
		Token t = yylex();
		if (t==null) {
			throw new IOException(error);
		}
		if (t.getType()!=type1 && (type2==-1 || t.getType()!=type2) &&
				(type3==-1 || t.getType()!=type3)) {
			throw new IOException(error + ", found '" + t.getLexeme() + "'");
		}
		return t;
	}


	/**
	 * Returns the next token, but does not take it off of the stream.  This
	 * is useful for lookahead.
	 *
	 * @return The next token.
	 * @throws IOException If an IO error occurs.
	 */
	public Token yyPeek() throws IOException {
		Token t = yylex();
		if (t!=null) {
			pushOntoStack(t);
		}
		return t;
	}


	/**
	 * Returns the <tt>depth</tt>-th token, but does not anything off of the
	 * stream.  This is useful for lookahead.
	 *
	 * @param depth The token to peek at, from <tt>1</tt> forward.
	 * @return The token, or <code>null</code> if that token index is past the
	 *         end of the stream.
	 * @throws IOException If an IO error occurs.
	 */
	public Token yyPeek(int depth) throws IOException {
		if (depth<1) {
			throw new IllegalArgumentException("depth must be >= 1");
		}
		Stack read = new Stack();
		for (int i=0; i<depth; i++) {
			Token t = yylex();
			if (t!=null) {
				read.push(t);
			}
			else {
				while (!read.isEmpty()) {
					yyPushback((Token)read.pop());
				}
				return null;
			}
		}
		Token t = (Token)read.peek();
		while (!read.isEmpty()) {
			yyPushback((Token)read.pop());
		}
		return t;
	}


	/**
	 * Peeks at and returns the type of the next token on the stream.
	 *
	 * @return The type of the next token, or <tt>-1</tt> if the end of stream
	 *         has been reached.
	 * @throws IOException If an IO error occurs.
	 */
	public int yyPeekCheckType() throws IOException {
		Token t = yyPeek();
		return t!=null ? t.getType() : -1;
	}


	/**
	 * Peeks at and returns the type of the specified token on the stream.
	 *
	 * @param index The index of the token to retrieve.
	 * @return The type of the token, or <tt>-1</tt> if the end of stream
	 *         was reached first.
	 * @throws IOException If an IO error occurs.
	 */
	public int yyPeekCheckType(int index) throws IOException {
		Token t = yyPeek(index);
		return t!=null ? t.getType() : -1;
	}


	/**
	 * Returns the next token, but does not take it off of the stream.  This
	 * is useful for lookahead.
	 *
	 * @return The next token.
	 * @throws IOException If an IO error occurs.
	 */
	public Token yyPeekNonNull(String error) throws IOException {
		Token t = yyPeek();
		if (t==null) {
			throw new IOException(error);
		}
		return t;
	}


	/**
	 * Returns the next token, but does not take it off of the stream.  This
	 * is useful for lookahead.
	 *
	 * @param type The type the token must be.
	 * @return The next token.
	 * @throws IOException If an IO error occurs, or if EOS is reached, or
	 *         if the token is not of the specified type.
	 */
	public Token yyPeekNonNull(int type, String error) throws IOException {
		return yyPeekNonNull(type, -1, error);
	}


	/**
	 * Returns the next token, but does not take it off of the stream.  This
	 * is useful for lookahead.
	 *
	 * @param type1 One of the two types the token must be.
	 * @param type2 The other of the two types the token must be.
	 * @return The next token.
	 * @throws IOException If an IO error occurs, or if EOS is reached, or
	 *         if the token is not of the specified type.
	 */
	public Token yyPeekNonNull(int type1, int type2, String error)
												throws IOException {
		return yyPeekNonNull(type1, type2, -1, error);
	}


	/**
	 * Returns the next token, but does not take it off of the stream.  This
	 * is useful for lookahead.
	 *
	 * @param type1 One of the three types the token must be.
	 * @param type2 Another of the three types the token must be.
	 * @param type3 The third of the types the token must be.
	 * @return The next token.
	 * @throws IOException If an IO error occurs, or if EOS is reached, or
	 *         if the token is not of the specified type.
	 */
	public Token yyPeekNonNull(int type1, int type2, int type3, String error)
												throws IOException {
		Token t = yyPeek();
		if (t==null) {
			throw new IOException(error);
		}
		if (t.getType()!=type1 && (type2==-1 || t.getType()!=type2) &&
				(type3==-1 || t.getType()!=type3)) {
			throw new IOException(error + ", found '" + t.getLexeme() + "'");
		}
		return t;
	}


	/**
	 * Pushes a token back onto the stream.
	 *
	 * @param t The token.
	 */
	public void yyPushback(Token t) {
		if (t!=null) {
			pushOntoStack(t);
		}
	}


	private class DocumentOffset implements Offset {

		public Position pos;

		public DocumentOffset(Position pos) {
			this.pos = pos;
		}

		public int getOffset() {
			return pos.getOffset();
		}

	}


}