/*
* 03/21/2010
*
* Copyright (C) 2010 Robert Futrell
* robert_futrell at users.sourceforge.net
* http://fifesoft.com/rsyntaxtextarea
*
* This library is distributed under a modified BSD license. See the included
* RSTALanguageSupport.License.txt file for details.
*/
package org.fife.rsta.ac.java.rjc.lexer;
import java.io.EOFException;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Stack;
import javax.swing.text.BadLocationException;
import javax.swing.text.Document;
import javax.swing.text.Position;
/**
* A scanner that allows the user to "push back" tokens. This scanner
* allows arbitrary lookahead.
*
* @author Robert Futrell
* @version 1.0
*/
public class Scanner {
private static final boolean DEBUG = false;
/**
* The scanner we delegate to.
*/
private SourceCodeScanner s;
/**
* Stack of tokens that have been "pushed back."
*/
private Stack stack;
/**
* The depth in which we're in TypeArguments or TypeParameters.
*/
private int typeArgLevel;
/**
* If we are parsing text in a Swing <code>JTextComponent</code>, this
* should be the document of that component.
*/
private Document doc;
/**
* The most recently lexed token, or <code>null</code> if EOS was
* reached.
*/
private Token mostRecentToken;
/**
* Constructor. This scanner will return no tokens unless some are pushed
* onto it via {@link #yyPushback(Token)}.
*/
public Scanner() {
this((Reader)null);
}
/**
* Constructor. This scanner will only return those tokens pushed onto it.
*
* @param tokens Tokens to return.
*/
public Scanner(List tokens) {
stack = new Stack();
for (int i=tokens.size()-1; i>=0; i--) {
stack.push(tokens.get(i));
}
}
/**
* Constructor.
*
* @param r The stream to read from.
*/
public Scanner(Reader r) {
s = r!=null ? new SourceCodeScanner(r) : null;
s.setKeepLastDocComment(true);
stack = new Stack();
}
/**
* This method is just here for debugging purposes to make sure
* our parser is sound.
*
* @param t A token to push onto the stack (non-<code>null</code>).
*/
private void pushOntoStack(Token t) {
if (t!=null && !stack.isEmpty() && t.equals(stack.peek())) {
System.err.println("ERROR: Token being duplicated: " + t);
Thread.dumpStack();
System.exit(5);
}
else if (t==null) {
System.err.println("ERROR: null token pushed onto stack");
Thread.dumpStack();
System.exit(6);
}
stack.push(t);
}
/**
* Decreases the depth in which we're in TypeArguments or TypeParameters.
*
* @see #increaseTypeArgumentsLevel()
* @see #getTypeArgumentsLevel()
*/
public void decreaseTypeArgumentsLevel() {
if (--typeArgLevel<0) {
throw new InternalError("typeArgLevel dipped below 0");
}
}
/**
* Returns an offset into the source being parsed. This offset will be
* tracked if we are parsing code from a Swing <code>JTextComponent</code>.
*
* @param offs The offset.
* @return An object representing the offset.
* @see #setDocument(Document)
*/
public Offset createOffset(final int offs) {
if (doc!=null) {
try {
return new DocumentOffset(doc.createPosition(offs));
} catch (BadLocationException ble) { // Should never happen
ble.printStackTrace();
}
}
return new Offset() {
public int getOffset() {
return offs;
}
};
}
private void debugPrintToken(Token t) {
if (DEBUG) {
if (t==null) {
System.out.println("... null");
}
else {
System.out.println("... " + t);
}
}
}
/**
* Returns the current column into the current line.
*
* @return The current column.
* @see #getLine()
*/
public int getColumn() {
return s.getColumn();
}
/**
* Returns the last documentation comment parsed. The "last documentation
* comment" is cleared when this method returns.
*
* @return The last documentation comment parsed, or <code>null</code>
* if there was none.
*/
public String getLastDocComment() {
return s.getLastDocComment();
}
/**
* Returns the current line into the document.
*
* @return The current line.
* @see #getColumn()
*/
public int getLine() {
return s.getLine();
}
/**
* Returns the most recently-lexed token.
*
* @return The token, or <code>null</code> if EOS was reached.
*/
public Token getMostRecentToken() {
return mostRecentToken;
}
/**
* Returns the current offset into the document.
*
* @return The offset.
*/
public int getOffset() {
return s.getOffset();
}
/**
* Eats through (possibly nested) paren pairs, e.g.:
* <pre>(int i=0; i<getFoo(getParam()); i++)</pre>.
* Blocks nested inside the paren pairs are also skipped.
*
* @throws IOException If an IO error occurs.
* @throws InternalError If the next token is not a '('.
*/
public void eatParenPairs() throws IOException {
Token t = yylex();
if (t==null || t.getType()!=TokenTypes.SEPARATOR_LPAREN) {
throw new InternalError("'(' expected, found: " + t);
}
int blockDepth = 0;
int parenDepth = 1;
while ((t=yylex())!=null) {
int type = t.getType();
switch (type) {
case TokenTypes.SEPARATOR_LBRACE:
blockDepth++;
break;
case TokenTypes.SEPARATOR_RBRACE:
blockDepth = Math.max(blockDepth-1, 0);
break;
case TokenTypes.SEPARATOR_LPAREN:
if (blockDepth==0) {
parenDepth++;
}
break;
case TokenTypes.SEPARATOR_RPAREN:
if (blockDepth==0 && --parenDepth == 0) {
return;
}
break;
}
}
}
/**
* Eats all tokens up to (and including) the next token of the specified
* type. This is useful, for example, to eat until the next semicolon.
*
* @param tokenType The type of token to eat through.
* @throws IOException If an IO error occurs.
*/
public void eatThroughNext(int tokenType) throws IOException {
Token t = null;
while ((t=yylex())!=null && t.getType()!=tokenType);
}
/**
* Eats all tokens up to (and including) the next token of the specified
* type. This is useful, for example, to eat until the next semicolon.
*
* @param tokenType The type of token to eat through.
* @throws IOException If an IO error occurs.
* @see #eatThroughNextSkippingBlocks(int, int)
* @see #eatThroughNextSkippingBlocksAndStuffInParens(int, int)
*/
public void eatThroughNextSkippingBlocks(int tokenType) throws IOException {
Token t = null;
int blockDepth = 0;
while ((t=yylex())!=null) {
int type = t.getType();
if (type==TokenTypes.SEPARATOR_LBRACE) {
blockDepth++;
}
else if (type==TokenTypes.SEPARATOR_RBRACE) {
blockDepth--;
}
else if (type==tokenType) {
if (blockDepth<=0) {
return;
}
}
}
}
/**
* Eats all tokens up to (and including) the next token of one of the
* specified types. This is useful, for example, to eat until the next
* equal sign or semicolon.
*
* @param tokenType1 The type of token to eat through.
* @param tokenType2 Another type of token to eat through.
* @return The last token read. This will either be one of the two token
* types passed in, or <code>null</code> if the end of the stream
* is reached.
* @throws IOException If an IO error occurs.
* @see #eatThroughNextSkippingBlocksAndStuffInParens(int, int)
*/
public Token eatThroughNextSkippingBlocks(int tokenType1,
int tokenType2) throws IOException {
Token t = null;
int blockDepth = 0;
while ((t=yylex())!=null) {
int type = t.getType();
if (type==TokenTypes.SEPARATOR_LBRACE) {
blockDepth++;
}
else if (type==TokenTypes.SEPARATOR_RBRACE) {
blockDepth--;
}
else if (type==tokenType1 || type==tokenType2) {
if (blockDepth<=0) {
return t;
}
}
}
return null;
}
/**
* Eats all tokens up to (and including) the next token of one of the
* specified types. This is useful, for example, to eat until the next
* equal sign or semicolon.
*
* @param tokenType1 The type of token to eat through.
* @param tokenType2 Another type of token to eat through.
* @return The last token read. This will either be one of the two token
* types passed in, or <code>null</code> if the end of the stream
* is reached.
* @throws IOException If an IO error occurs.
* @see #eatThroughNextSkippingBlocks(int, int)
*/
public Token eatThroughNextSkippingBlocksAndStuffInParens(int tokenType1,
int tokenType2) throws IOException {
Token t = null;
int blockDepth = 0;
int parenDepth = 0;
while ((t=yylex())!=null) {
int type = t.getType();
switch (type) {
case TokenTypes.SEPARATOR_LBRACE:
blockDepth++;
break;
case TokenTypes.SEPARATOR_RBRACE:
blockDepth--;
break;
case TokenTypes.SEPARATOR_LPAREN:
parenDepth++;
break;
case TokenTypes.SEPARATOR_RPAREN:
parenDepth--;
break;
default:
if (type==tokenType1 || type==tokenType2) {
if (blockDepth<=0 && parenDepth<=0) {
return t;
}
}
}
}
return null;
}
public void eatUntilNext(int type1, int type2) throws IOException {
Token t = null;
while ((t=yylex())!=null) {
int type = t.getType();
if (type==type1 || type==type2) {
yyPushback(t);
break;
}
}
}
public void eatUntilNext(int type1, int type2, int type3) throws IOException {
Token t = null;
while ((t=yylex())!=null) {
int type = t.getType();
if (type==type1 || type==type2 || type==type3) {
yyPushback(t);
break;
}
}
}
/**
* Returns the current TypeArgument/TypeParameter level.
*
* @return The current level.
* @see #increaseTypeArgumentsLevel()
* @see #decreaseTypeArgumentsLevel()
*/
public int getTypeArgumentsLevel() {
return typeArgLevel;
}
/**
* Increases the depth in which we're in TypeArguments or TypeParameters.
*
* @see #decreaseTypeArgumentsLevel()
* @see #getTypeArgumentsLevel()
*/
public void increaseTypeArgumentsLevel() {
typeArgLevel++;
}
private Stack resetPositions;
private Stack currentResetTokenStack;
private int currentResetStartOffset;
public void markResetPosition() {
if (s!=null) { // Hack! We should really do something for token-only scanners
if (resetPositions==null) {
resetPositions = new Stack();
}
currentResetTokenStack = new Stack();
resetPositions.push(currentResetTokenStack);
currentResetStartOffset = s.getOffset();
}
}
public void resetToLastMarkedPosition() {
if (s!=null) { // Hack! We should really do something for token-only scanners
if (currentResetTokenStack==null) {
throw new InternalError("No resetTokenStack!");
}
// Remove tokens off the standard stack within the "marked" range
while (!stack.isEmpty()) {
Token t = (Token)stack.peek();
if (t.getOffset()>=currentResetStartOffset) {
stack.pop();
}
else {
break;
}
}
// Add all tokens in the "marked" range to our stack
while (!currentResetTokenStack.isEmpty()) {
Token t = (Token)currentResetTokenStack.pop();
stack.push(t);
}
resetPositions.pop(); // Remote currentResetTokenStack
currentResetTokenStack = resetPositions.isEmpty() ? null : (Stack)resetPositions.peek();
currentResetStartOffset = -1;
}
}
public void clearResetPosition() {
if (s!=null) { // Hack! We should really do something for token-only scanners
if (currentResetTokenStack==null) {
throw new InternalError("No resetTokenStack!");
}
resetPositions.pop(); // Remote currentResetTokenStack
currentResetTokenStack = resetPositions.isEmpty() ? null : (Stack)resetPositions.peek();
currentResetStartOffset = -1;
}
}
/**
* Sets the Swing <code>Document</code> whose content is being parsed.
* This method should be called if we are parsing code inside a
* <code>JTextComponent</code>, as it will help our parsed code to track
* changes when the document is modified. If we are parsing source from a
* flat file, this method shouldn't be called.
*
* @param doc The document being parsed.
*/
public void setDocument(Document doc) {
this.doc = doc;
}
/**
* Skips all bracket pairs ('[' followed by ']') in the stream.
*
* @return The number of bracket pairs skipped.
* @throws IOException If an IO error occurs.
*/
public int skipBracketPairs() throws IOException {
int count = 0;
while (yyPeekCheckType()==TokenTypes.SEPARATOR_LBRACKET &&
yyPeekCheckType(2)==TokenTypes.SEPARATOR_RBRACKET) {
yylex();
yylex();
count++;
}
return count;
}
/**
* Returns the next token from the input stream.
*
* @return The next token.
* @throws IOException If an IO error occurs.
*/
/*
* NOTE: All other lex'ing methods should call into this one.
*/
public Token yylex() throws IOException {
Token t = null;
if (stack.isEmpty()) {
t = s!=null ? s.yylex() : null;
}
else {
t = (Token)stack.pop();
}
// If we have nested TypeArguments ("Set<Map.Entry<String,String>>"),
// Prevent the ">>" from coming across as a single token.
if (typeArgLevel>0 && t!=null && t.isOperator()) {
String lexeme = t.getLexeme();
if (lexeme.length()>1) {
char ch = lexeme.charAt(0);
if (ch=='<') {
Token rest = null;
switch (t.getType()) {
case TokenTypes.OPERATOR_LTE:
rest = new TokenImpl(Token.OPERATOR_EQUALS, "=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
case TokenTypes.OPERATOR_LSHIFT:
rest = new TokenImpl(Token.OPERATOR_LT, "<",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
case TokenTypes.OPERATOR_LSHIFT_EQUALS:
rest = new TokenImpl(Token.OPERATOR_LTE, "<=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
}
stack.push(rest);
t = new TokenImpl(Token.OPERATOR_LT, "<",
t.getLine(), t.getColumn(), t.getOffset());
}
else if (ch=='>') {
Token rest = null;
switch (t.getType()) {
case TokenTypes.OPERATOR_GTE:
rest = new TokenImpl(Token.OPERATOR_EQUALS, "=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
case TokenTypes.OPERATOR_RSHIFT:
rest = new TokenImpl(Token.OPERATOR_GT, ">",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
case TokenTypes.OPERATOR_RSHIFT2:
rest = new TokenImpl(Token.OPERATOR_RSHIFT, ">>",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
case TokenTypes.OPERATOR_RSHIFT_EQUALS:
rest = new TokenImpl(Token.OPERATOR_GTE, ">=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
case TokenTypes.OPERATOR_RSHIFT2_EQUALS:
rest = new TokenImpl(Token.OPERATOR_RSHIFT_EQUALS, ">>=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
break;
}
stack.push(rest);
t = new TokenImpl(Token.OPERATOR_GT, ">",
t.getLine(), t.getColumn(), t.getOffset());
}
}
}
debugPrintToken(t);
if (currentResetTokenStack!=null) {
currentResetTokenStack.push(t);
}
if (t!=null) { // Don't let EOS corrupt most recent token
mostRecentToken = t;
}
return t;
}
/**
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached.
*
* @param error The error description for the exception if the end of
* stream is reached.
* @return The token.
* @throws IOException If an IO error occurs or the end of stream is
* reached.
*/
public Token yylexNonNull(String error) throws IOException {
Token t = yylex();
if (t==null) {
throw new EOFException(error);
}
return t;
}
/**
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached or if the token is not of a given
* type.
*
* @param type The type the token must be.
* @param error The error description for the exception if the end of
* stream is reached, or if the token is of an unexpected type.
* @return The token.
* @throws IOException If an IO error occurs or the end of stream is
* reached, or if the token is of the wrong type.
*/
public Token yylexNonNull(int type, String error) throws IOException {
return yylexNonNull(type, -1, error);
}
/**
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached or if the token is not of two given
* types.
*
* @param type1 One type the token can be.
* @param type2 Another type the token can be, or <tt>-1</tt> if we
* should only check against <tt>type1</tt>.
* @param error The error description for the exception if the end of
* stream is reached, or if the token is of an unexpected type.
* @return The token.
* @throws IOException If an IO error occurs or the end of stream is
* reached, or if the token is of a wrong type.
*/
public Token yylexNonNull(int type1, int type2, String error)
throws IOException {
return yylexNonNull(type1, type2, -1, error);
}
/**
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached or if the token is not of three given
* types.
*
* @param type1 One type the token can be.
* @param type2 Another type the token can be, or <tt>-1</tt> if we
* should only check against <tt>type1</tt>.
* @param type3 Another type the token can be, or <tt>-1</tt> if we
* should only check against <tt>type1</tt> and <tt>type2</tt>.
* @param error The error description for the exception if the end of
* stream is reached, or if the token is of an unexpected type.
* @return The token.
* @throws IOException If an IO error occurs or the end of stream is
* reached, or if the token is of a wrong type.
*/
public Token yylexNonNull(int type1, int type2, int type3, String error)
throws IOException {
Token t = yylex();
if (t==null) {
throw new IOException(error);
}
if (t.getType()!=type1 && (type2==-1 || t.getType()!=type2) &&
(type3==-1 || t.getType()!=type3)) {
throw new IOException(error + ", found '" + t.getLexeme() + "'");
}
return t;
}
/**
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
*
* @return The next token.
* @throws IOException If an IO error occurs.
*/
public Token yyPeek() throws IOException {
Token t = yylex();
if (t!=null) {
pushOntoStack(t);
}
return t;
}
/**
* Returns the <tt>depth</tt>-th token, but does not anything off of the
* stream. This is useful for lookahead.
*
* @param depth The token to peek at, from <tt>1</tt> forward.
* @return The token, or <code>null</code> if that token index is past the
* end of the stream.
* @throws IOException If an IO error occurs.
*/
public Token yyPeek(int depth) throws IOException {
if (depth<1) {
throw new IllegalArgumentException("depth must be >= 1");
}
Stack read = new Stack();
for (int i=0; i<depth; i++) {
Token t = yylex();
if (t!=null) {
read.push(t);
}
else {
while (!read.isEmpty()) {
yyPushback((Token)read.pop());
}
return null;
}
}
Token t = (Token)read.peek();
while (!read.isEmpty()) {
yyPushback((Token)read.pop());
}
return t;
}
/**
* Peeks at and returns the type of the next token on the stream.
*
* @return The type of the next token, or <tt>-1</tt> if the end of stream
* has been reached.
* @throws IOException If an IO error occurs.
*/
public int yyPeekCheckType() throws IOException {
Token t = yyPeek();
return t!=null ? t.getType() : -1;
}
/**
* Peeks at and returns the type of the specified token on the stream.
*
* @param index The index of the token to retrieve.
* @return The type of the token, or <tt>-1</tt> if the end of stream
* was reached first.
* @throws IOException If an IO error occurs.
*/
public int yyPeekCheckType(int index) throws IOException {
Token t = yyPeek(index);
return t!=null ? t.getType() : -1;
}
/**
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
*
* @return The next token.
* @throws IOException If an IO error occurs.
*/
public Token yyPeekNonNull(String error) throws IOException {
Token t = yyPeek();
if (t==null) {
throw new IOException(error);
}
return t;
}
/**
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
*
* @param type The type the token must be.
* @return The next token.
* @throws IOException If an IO error occurs, or if EOS is reached, or
* if the token is not of the specified type.
*/
public Token yyPeekNonNull(int type, String error) throws IOException {
return yyPeekNonNull(type, -1, error);
}
/**
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
*
* @param type1 One of the two types the token must be.
* @param type2 The other of the two types the token must be.
* @return The next token.
* @throws IOException If an IO error occurs, or if EOS is reached, or
* if the token is not of the specified type.
*/
public Token yyPeekNonNull(int type1, int type2, String error)
throws IOException {
return yyPeekNonNull(type1, type2, -1, error);
}
/**
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
*
* @param type1 One of the three types the token must be.
* @param type2 Another of the three types the token must be.
* @param type3 The third of the types the token must be.
* @return The next token.
* @throws IOException If an IO error occurs, or if EOS is reached, or
* if the token is not of the specified type.
*/
public Token yyPeekNonNull(int type1, int type2, int type3, String error)
throws IOException {
Token t = yyPeek();
if (t==null) {
throw new IOException(error);
}
if (t.getType()!=type1 && (type2==-1 || t.getType()!=type2) &&
(type3==-1 || t.getType()!=type3)) {
throw new IOException(error + ", found '" + t.getLexeme() + "'");
}
return t;
}
/**
* Pushes a token back onto the stream.
*
* @param t The token.
*/
public void yyPushback(Token t) {
if (t!=null) {
pushOntoStack(t);
}
}
private class DocumentOffset implements Offset {
public Position pos;
public DocumentOffset(Position pos) {
this.pos = pos;
}
public int getOffset() {
return pos.getOffset();
}
}
}