package dk.brics.jsparser; import java.io.IOException; import java.io.PushbackReader; import dk.brics.jsparser.lexer.Lexer; import dk.brics.jsparser.lexer.LexerException; import dk.brics.jsparser.node.EOF; import dk.brics.jsparser.node.TEndl; import dk.brics.jsparser.node.TRbrace; import dk.brics.jsparser.node.TRegexpLiteral; import dk.brics.jsparser.node.TSemicolon; import dk.brics.jsparser.node.TSlash; import dk.brics.jsparser.node.Token; import dk.brics.jsparser.parser.TokenIndex; public class SemicolonInsertingLexer extends Lexer { public SemicolonInsertingLexer(PushbackReader in) { super(in); } /* * Warning: This code is very ugly and error prone. */ private boolean previousPreviousWasEndl = false; private boolean previousWasEndl = false; private boolean insertSemicolonAsNext = false; private Token lastToken; private Token bufferedToken; private boolean hasInsertedSemicolon = false; private Token errtoken; private Token nonEndlToken; // useful for debugging info /* * If the offending token is a TSlash, then treat it as a regexp literal instead. * Otherwise, try inserting a semicolon in front of it, unless we already inserted one here. * * Note that the regexp literal may itself become an offending token and then have a semicolon * inserted in front of it. Also note that no statement can start with a TSlash token, so there * is no need to attempt to insert a semicolon before the slash. */ @Override public boolean errorOccurred() throws IOException { if (errtoken != null) return false; if (lastToken instanceof TSlash) { super.state = State.REGEXP; this.lastToken = null; super.token = null; this.previousWasEndl = this.previousPreviousWasEndl; return true; } else if ((previousWasEndl || lastToken instanceof TRbrace || lastToken instanceof EOF) && !hasInsertedSemicolon) { // symbol was not accepted, try inserting a semicolon bufferedToken = lastToken; insertSemicolonAsNext = true; super.token = null; return true; } else if (bufferedToken != null) { // a parser error will occur. make sure actual offending token // is used for error reporting instead of the inserted semicolon errtoken = bufferedToken; super.token = null; return true; } else { return false; } } TokenIndex converter = new TokenIndex(); private boolean isIgnored(Token tok) { converter.index = -1; tok.apply(converter); return converter.index == -1; } @Override protected Token getToken() throws IOException, LexerException { if (errtoken != null) { return errtoken; } // FIXME: Insert semicolon after return,break,continue,throw previousPreviousWasEndl = previousWasEndl; if (insertSemicolonAsNext) { TSemicolon semi = new TSemicolon(); semi.setAutomaticallyInserted(true); hasInsertedSemicolon = true; insertSemicolonAsNext = false; return semi; } else if (bufferedToken != null) { lastToken = bufferedToken; bufferedToken = null; // note: hasInsertedSemicolon should remain true return lastToken; } else { Token tok = super.getToken(); if (tok instanceof TRegexpLiteral) { TRegexpLiteral regexp = (TRegexpLiteral) tok; regexp.setText("/" + regexp.getText()); // insert the missing slash regexp.setPos(regexp.getPos()-1); } if (!(tok instanceof TRegexpLiteral) && !isIgnored(tok)) { previousWasEndl = false; nonEndlToken = tok; } while (tok instanceof TEndl || isIgnored(tok)) { if (tok instanceof TEndl) { previousWasEndl = true; } tok = super.getToken(); } lastToken = tok; hasInsertedSemicolon = false; super.state = State.NORMAL; // override sablecc state change return tok; } } }