/* * Copyright 2008-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ package org.visage.tools.antlr; import java.util.HashMap; import com.sun.tools.mjavac.code.Source; import com.sun.tools.mjavac.util.Context; import com.sun.tools.mjavac.util.List; import com.sun.tools.mjavac.util.Log; import com.sun.tools.mjavac.util.Name; import com.sun.tools.mjavac.tree.JCTree; import com.sun.tools.mjavac.util.Options; import org.visage.tools.tree.VisageInterpolateValue; import org.visage.tools.tree.VisageTree; import org.visage.tools.tree.VisageBlock; import org.visage.tools.tree.VisageErroneous; import org.visage.tools.tree.VisageType; import org.visage.tools.tree.VisageTreeInfo; import org.visage.tools.tree.VisageTreeMaker; import org.visage.tools.util.MsgSym; import javax.tools.DiagnosticListener; import org.antlr.runtime.*; /** * Base class for ANTLR generated parsers. * This version incorporates error reporting and recovery changes * enabled by using ANTLR 3.1. * * @author Robert Field * @author Jim Idle */ public abstract class AbstractGeneratedParserV4 extends Parser { /** * Create a new parser instance, pre-supplying the input token stream. * @param input The stream of tokens that will be pulled from the lexer */ protected AbstractGeneratedParserV4(TokenStream input) { super(input); } /** * Create a new parser instance, pre-supplying the input token stream * and the shared state. * This is only used when a grammar is imported into another grammar. * * @param input The stream of tokesn that will be pulled from the lexer * @param state The shared state object created by an interconnectd grammar */ protected AbstractGeneratedParserV4(TokenStream input, RecognizerSharedState state) { super(input, state); } /** The factory to be used for abstract syntax tree construction. */ protected VisageTreeMaker F; /** The log to be used for error diagnostics. */ protected Log log; /** * The Source language setting. */ protected Source source; /** * The token id for white space */ protected int whiteSpaceToken = v4Parser.WS; /** * Should the parser generate an end positions map? */ protected boolean genEndPos; /** * Should the parser preserve trees as much as possible (for IDE)? */ protected boolean preserveTrees; /** * The end positions map. * End positions are built by the parser such that each entry in the map * is keyed by a Visage tree node built by the parser and the value is * the token number in the token stream that correponds to the end position * of the node. */ HashMap<JCTree,Integer> endPositions; /** * The doc comments map. * The documentation comments are comments starting * with '/**'. Built by the parser, this map is keyed by the AST * node that a comment belongs to and the value is the full text * of the comment, including the enclosing '/**' and comment end sequence */ HashMap<JCTree,String> docComments; /** * */ private VisageTreeInfo treeInfo; /** * The name table. * Keeps track of all the identifiers discovered by the parser in any particular * context. */ protected Name.Table names; /** * Local Visage tree node used to build an error node in to the AST * when a syntax or semantic error is detected while parsing the * script. Error nodes are used by downstream tools such as IDEs * so that they can navigate source code even while it is not, * strictly speaking, valid code. */ protected VisageErroneous errorNode = null; /** * Defines the human readable names of all the tokens that the lexer * can produce for use by error messages and utilities that interact with * the user/author. */ protected java.util.Map<String, String> tokenMap = new java.util.HashMap<String, String>(); { tokenMap.put("ABSTRACT", "abstract"); tokenMap.put("ASSERT", "assert"); tokenMap.put("BIND", "bind"); tokenMap.put("BOUND", "bound"); tokenMap.put("BREAK", "break"); tokenMap.put("CLASS", "class"); tokenMap.put("CONTINUE", "continue"); tokenMap.put("DELETE", "delete"); tokenMap.put("FALSE", "false"); tokenMap.put("FOR", "for"); tokenMap.put("FUNCTION", "function"); tokenMap.put("IF", "if"); tokenMap.put("IMPORT", "import"); tokenMap.put("INIT", "init"); tokenMap.put("INSERT", "insert"); tokenMap.put("LET", "let"); tokenMap.put("NEW", "new"); tokenMap.put("NOT", "not"); tokenMap.put("NULL", "null"); tokenMap.put("OVERRIDE", "override"); tokenMap.put("PACKAGE", "package"); tokenMap.put("POSTINIT", "postinit"); tokenMap.put("PRIVATE", "private"); tokenMap.put("PROTECTED", "protected"); tokenMap.put("PUBLIC", "public"); tokenMap.put("READONLY", "readonly"); tokenMap.put("RETURN", "return"); tokenMap.put("SUPER", "super"); tokenMap.put("SIZEOF", "sizeof"); tokenMap.put("STATIC", "static"); tokenMap.put("THIS", "this"); tokenMap.put("THROW", "throw"); tokenMap.put("TRY", "try"); tokenMap.put("TRUE", "true"); tokenMap.put("VAR", "var"); tokenMap.put("WHILE", "while"); tokenMap.put("POUND", "#"); tokenMap.put("LPAREN", "("); tokenMap.put("LBRACKET", "["); tokenMap.put("PLUSPLUS", "++"); tokenMap.put("SUBSUB", "--"); tokenMap.put("PIPE", "|"); tokenMap.put("AFTER", "after"); tokenMap.put("AND", "and"); tokenMap.put("AS", "as"); tokenMap.put("BEFORE", "before"); tokenMap.put("CATCH", "catch"); tokenMap.put("ELSE", "else"); tokenMap.put("EXCLUSIVE", "exclusive"); tokenMap.put("EXTENDS", "extends"); tokenMap.put("FINALLY", "finally"); tokenMap.put("FIRST", "first"); tokenMap.put("FROM", "from"); tokenMap.put("IN", "in"); tokenMap.put("INDEXOF", "indexof"); tokenMap.put("INSTANCEOF", "instanceof"); tokenMap.put("INTO", "into"); tokenMap.put("INVERSE", "inverse"); tokenMap.put("LAST", "last"); tokenMap.put("LAZY", "lazy"); tokenMap.put("ON", "on"); tokenMap.put("OR", "or"); tokenMap.put("REPLACE", "replace"); tokenMap.put("REVERSE", "reverse"); tokenMap.put("STEP", "step"); tokenMap.put("THEN", "then"); tokenMap.put("TYPEOF", "typeof"); tokenMap.put("WITH", "with"); tokenMap.put("WHERE", "where"); tokenMap.put("DOTDOT", ".."); tokenMap.put("RPAREN", ")"); tokenMap.put("RBRACKET", "]"); tokenMap.put("SEMI", ";"); tokenMap.put("COMMA", ","); tokenMap.put("DOT", "."); tokenMap.put("EQEQ", "=="); tokenMap.put("EQ", "="); tokenMap.put("GT", ">"); tokenMap.put("LT", "<"); tokenMap.put("LTGT", "<>"); tokenMap.put("NOTEQ", "!="); tokenMap.put("LTEQ", "<="); tokenMap.put("GTEQ", ">="); tokenMap.put("PLUS", "+"); tokenMap.put("SUB", "-"); tokenMap.put("STAR", "*"); tokenMap.put("SLASH", "/"); tokenMap.put("PERCENT", "%"); tokenMap.put("PLUSEQ", "+="); tokenMap.put("SUBEQ", "-="); tokenMap.put("STAREQ", "*="); tokenMap.put("SLASHEQ", "/="); tokenMap.put("PERCENTEQ", "%="); tokenMap.put("COLON", ":"); tokenMap.put("QUES", "?"); tokenMap.put("DoubleQuoteBody", "double quote string literal"); tokenMap.put("SingleQuoteBody", "single quote string literal"); tokenMap.put("STRING_LITERAL", "string literal"); tokenMap.put("NextIsPercent", "%"); tokenMap.put("QUOTE_LBRACE_STRING_LITERAL", "\" { string literal"); tokenMap.put("LBRACE", "{"); tokenMap.put("RBRACE_QUOTE_STRING_LITERAL", "} \" string literal"); tokenMap.put("RBRACE_LBRACE_STRING_LITERAL", "} { string literal"); tokenMap.put("RBRACE", "}"); tokenMap.put("FORMAT_STRING_LITERAL", "format string literal"); tokenMap.put("TranslationKeyBody", "translation key body"); tokenMap.put("TRANSLATION_KEY", "translation key"); tokenMap.put("DECIMAL_LITERAL", "decimal literal"); tokenMap.put("Digits", "digits"); tokenMap.put("Exponent", "exponent"); tokenMap.put("TIME_LITERAL", "time literal"); tokenMap.put("OCTAL_LITERAL", "octal literal"); tokenMap.put("HexDigit", "hex digit"); tokenMap.put("HEX_LITERAL", "hex literal"); tokenMap.put("RangeDots", ".."); tokenMap.put("FLOATING_POINT_LITERAL", "floating point literal"); tokenMap.put("Letter", "letter"); tokenMap.put("JavaIDDigit", "java ID digit"); tokenMap.put("IDENTIFIER", "identifier"); tokenMap.put("WS", "white space"); tokenMap.put("COMMENT", "comment"); tokenMap.put("LINE_COMMENT", "line comment"); tokenMap.put("LAST_TOKEN", "last token"); } /** * An array of the human readable names of all the tokens the * lexer can provide to the parser. * * This field should be accessed using the getVisageTokenNames method * @see #getVisageTokenNames */ protected String[] visageTokenNames = null; /** * Provides a human readable name for each of the parser grammar rules * for use by error messages or any tool that interacts with the user/author */ protected String[][] ruleMap = { {"script", "the script contents"}, {"scriptItems", "the script contents"}, {"scriptItem", "the script contents"}, {"modifers", "the modifiers for a declaration ('function', 'var', 'class', etc)"}, {"modiferFlag", "an access modifier"}, {"packageDecl", "a 'package' declaration"}, {"importDecl", "an 'import' declaration"}, {"importId", "an 'import' declaration"}, {"classDefinition", "a 'class' declaration"}, {"supers", "the 'extends' part of a 'class' declaration"}, {"classMembers", "the members of a 'class' declaration"}, {"classMember", "a 'class' declaration member"}, {"functionDefinition", "a function declaration"}, {"overrideDeclaration", "an overridden variable"}, {"initDefinition", "an 'init' block"}, {"postInitDefinition", "a 'postinit' block"}, {"variableDeclaration", "a variable declaration"}, {"formalParameters", "the parameters of a function declaration"}, {"formalParameter", "a parameter"}, {"block", "a block"}, {"statement", "a statement"}, {"onReplaceClause", "an 'on replace' clause"}, {"paramNameOpt", "an optional parameter name"}, {"paramName", "a parameter name"}, {"variableLabel", "a variable declaration"}, {"throwStatement", "a 'throw' statement"}, {"whileStatement", "a 'while' statement"}, {"insertStatement", "an 'insert' statement"}, {"indexedSequenceForInsert", "an indexed sequence in an insert statement"}, {"deleteStatement", "a 'delete' statement"}, {"returnStatement", "a 'return' statement"}, {"tryStatement", "a 'try' statement"}, {"finallyClause", "a 'finally' clause"}, {"catchClause", "a 'catch' clause"}, {"boundExpression", "an expression"}, {"expression", "an expression"}, {"forExpression", "a 'for' statement or expression"}, {"inClause", "the 'in' clause of a 'for' expression"}, {"ifExpression", "an if statement or expression"}, {"elseClause", "the 'else' clause of an 'if' expression"}, {"assignmentExpression", "an assignment"}, {"assignmentOpExpression", "an operator assignment expression"}, {"assignOp", "an assignment operator"}, {"andExpression", "an expression"}, {"orExpression", "an expression"}, {"typeExpression", "an expression"}, {"relationalExpression", "an expression"}, {"relOps", "a relational operator"}, {"additiveExpression", "an expression"}, {"arithOps", "an arithmetic operator"}, {"multiplicativeExpression", "an expression"}, {"multOps", "an arithmetic operator"}, {"unaryExpression", "an expression"}, {"unaryOps", "a unary operator"}, {"suffixedExpression", "an expression"}, {"postfixExpression", "an expression"}, {"primaryExpression", "an expression"}, {"keyFrameLiteralPart", "a frame value expression"}, {"functionExpression", "an anonymous function definition"}, {"newExpression", "a 'new' expression"}, {"objectLiteral", "an object literal definition"}, {"objectLiteralPart", "a member of an object literal"}, {"objectLiteralInit", "an object literal initializer"}, {"stringExpression", "a string expression"}, {"strCompoundElement", "a compound string element"}, {"stringLiteral", "a string literal"}, {"qlsl", "a compound string element"}, {"stringExpressionInner", "an embedded string expression"}, {"stringFormat", "a string formatting specification"}, {"bracketExpression", "a sequence creation expression"}, {"expressionList", "a list of expressions"}, {"expressionListOpt", "an optional list of expressions"}, {"type", "a type specification"}, {"typeArgList", "a type specification"}, {"typeArg", "a type specification"}, {"typeReference", "a type specification"}, {"cardinality", "a type specification"}, {"typeName", "a type specification"}, {"genericArgument", "a type specification"}, {"literal", "a literal constant"}, {"qualname", "a qualified identifier"}, {"identifier", "an identifier"}, {"identifierAll", "an identifier"}, {"name", "an identifier"}, {"nameAll", "an identifier"}, }; /** * Initializes a new instance of GeneratedParser */ protected void initialize(Context context) { this.F = (VisageTreeMaker)VisageTreeMaker.instance(context); this.log = Log.instance(context); this.names = Name.Table.instance(context); this.source = Source.instance(context); Options options = Options.instance(context); this.genEndPos = options.get("-Xjcov") != null || context.get(DiagnosticListener.class) != null || Boolean.getBoolean("VisageModuleBuilder.debugBadPositions"); this.preserveTrees = options.get("preserveTrees") != null; this.treeInfo = (VisageTreeInfo) VisageTreeInfo.instance(context); } /** * Using the supplied grammar rule name, search the rule map * and return a user friendly description of the what the * rule indicates we must have been parsing at the time of * error. * * @param ruleName The grammar rule name as supplied by ANTLR error routines * @return Friendly form of the rule name for use in messages */ protected String stackPositionDescription(String ruleName) { // optimize for the non-error case: do sequential search // for (String[] pair : ruleMap) { if (pair[0].equals(ruleName)) { // We found a rule name that matched where we are on the stack // so we can use the description associated with it. // return pair[1]; } } // If here then we did not suppyl a specific description // for this rule, so we attempt to formulate it into something // readable by humans. We wplit the rule name on camel case // and predict if this is 'an' or 'a' // StringBuffer sb = new StringBuffer(ruleName.length()+1); switch (ruleName.charAt(0)) { case 'a': case 'e': case 'i': case 'o': case 'u': sb.append("an "); break; default: sb.append("a "); break; } for (char ch : ruleName.toCharArray()) { if (Character.isUpperCase(ch)) { sb.append(' '); sb.append(Character.toLowerCase(ch)); } else { sb.append(ch); } } return sb.toString(); } /** * A translation matrix for converting a particular token classification * into a human readable description. */ protected enum TokenClassification { KEYWORD { String forHumans() { return "a keyword"; } }, DEPRECATED_KEYWORD { String forHumans() { return "a no longer supported keyword"; } }, OPERATOR { String forHumans() { return "an operator"; } }, IDENTIFIER { String forHumans() { return "an identifier"; } }, PUNCTUATION { String forHumans() { return "a punctuation character"; } }, UNKNOWN { String forHumans() { return "a token"; } }; abstract String forHumans(); }; /** * */ protected TokenClassification[] tokenClassMap = new TokenClassification[v4Parser.LAST_TOKEN + 1]; /** * Initializer is used to initalize our token class map, which tells * error messages and so on how to describe the token to human beings. */ { // First, set all the token types to UNKNOWN. LAST_TOKEN is an artifical // token generated by the parser, so that it is assigned a token number // higher than all the lexer defined tokens and we can use it as size // for (int index = 0; index <= v4Parser.LAST_TOKEN; index += 1) { tokenClassMap[index] = TokenClassification.UNKNOWN; } // Now set the type ourselves, leaving anythign we don't know about yet // to show up as UNKNOWN. // If a token is removed from the grammar, the corresponding initialization // will fail to compile (which is the earliest we could detect the problem). // // Keywords: // tokenClassMap[v4Parser.ABSTRACT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.ASSERT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.BIND] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.BOUND] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.BREAK] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.CLASS] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.CONTINUE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.DELETE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.FALSE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.FOR] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.FUNCTION] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.IF] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.IMPORT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.INIT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.INSERT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.DEF] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.NEW] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.NOT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.NULL] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.OVERRIDE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.PACKAGE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.POSTINIT] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.PRIVATE] = TokenClassification.DEPRECATED_KEYWORD; tokenClassMap[v4Parser.PROTECTED] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.PUBLIC] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.RETURN] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.SUPER] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.SIZEOF] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.STATIC] = TokenClassification.DEPRECATED_KEYWORD; tokenClassMap[v4Parser.THIS] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.THROW] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.TRY] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.TRUE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.VAR] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.WHILE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.AFTER] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.AND] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.AS] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.BEFORE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.CATCH] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.ELSE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.EXCLUSIVE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.EXTENDS] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.FINALLY] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.FIRST] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.FROM] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.IN] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.INDEXOF] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.INSTANCEOF] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.INTO] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.INVERSE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.LAST] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.LAZY] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.ON] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.OR] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.REPLACE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.REVERSE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.STEP] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.THEN] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.TYPEOF] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.WITH] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.WHERE] = TokenClassification.KEYWORD; tokenClassMap[v4Parser.TWEEN] = TokenClassification.KEYWORD; // Operators: // tokenClassMap[v4Parser.PLUSPLUS] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.SUBSUB] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.PIPE] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.DOTDOT] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.DOT] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.EQEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.EQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.GT] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.LT] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.LTGT] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.NOTEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.LTEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.GTEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.PLUS] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.SUB] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.STAR] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.SLASH] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.PERCENT] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.PLUSEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.SUBEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.STAREQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.SLASHEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.PERCENTEQ] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.QUES] = TokenClassification.OPERATOR; tokenClassMap[v4Parser.SUCHTHAT] = TokenClassification.OPERATOR; // Punctuation/syntactic sugar: // tokenClassMap[v4Parser.COLON] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.RPAREN] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.RBRACKET] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.SEMI] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.COMMA] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.POUND] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.LPAREN] = TokenClassification.PUNCTUATION; tokenClassMap[v4Parser.LBRACKET] = TokenClassification.PUNCTUATION; // Others: // tokenClassMap[v4Parser.IDENTIFIER] = TokenClassification.IDENTIFIER; } /** * Returns the classification (OPERATOR, PUNCTUATION, etc) of the * supplied token. * @param t The token to classify * @return The token classification */ private TokenClassification classifyToken(Token t) { // Ask ANTLR what the type is // int tokenType = t.getType(); // And work out what we have // return classifyToken(tokenType); } /** * Returns the classification (OPERATOR, PUNCTUATION, etc) of the * supplied token type * @param t The token to classify * @return The token classification */ private TokenClassification classifyToken(int tokenType) { // Assume that we don't know what this token is // TokenClassification result = TokenClassification.UNKNOWN; // And if it is wihtin the range that we know about, then // return the classification that we hard coded. // if ((tokenType >= 0) && tokenType < tokenClassMap.length) { result = tokenClassMap[tokenType]; } return result; } /** * Returns the parser name, which is really only useful fdor debugging scenarios. * @return The name of the parser class */ protected String getParserName() { return this.getClass().getName(); } /** * Using the given exception generated by the parser, produce an error * message string that is geared towards the Visage script author/user. * @param e The exception generated by the parser. * @param tokenNames The names of the tokens as generated by ANTLR (unused by this method). * @return The human readable error message string. */ @Override public String getErrorMessage(RecognitionException e, String[] tokenNames) { // The rule invocation stack tells us where we are in terms of // LL parse and the path throguh the rules that got us to this point. // java.util.List stack = getRuleInvocationStack(e, getParserName()); // The top of the stack is the rule that actaully generated the // exception. // String stackTop = stack.get(stack.size()-1).toString(); // Now we know where we are, we can pick out the human oriented // description of what we were tryig to parse. // String posDescription = stackPositionDescription(stackTop); // Where we will build the error message string // StringBuffer mb = new StringBuffer(); // Rather than just send a diagnostic message containing just a // start position, we really want to create an error spanning // Erroneous node. The recipient, such as the IDE, can then nicely underline // the token(s) that are in error. So we calculate an endPos and start pos to create // an erroneous node at the end of this method, defaulting it to the current // position. // int ep = pos()+1; int sp = pos(); // The exact error message we will construct depends on the // exception type that was generated. We will be given one of // the following exceptions: // // UnwantedTokenException - There was an extra token in the stream that // we can see was extra because the next token after it // is the one that would have matched correctly. // // MissingTokenException - There was a missing token in the stream that we see // was missing because the token we actually saw was one // that is a member of the followset had the token been // present. // // MismatchedTokenException - The token we received was not one we were expecting, but // we could neither identify a missing token that would have made it // something we can deal with, nor that it was just an // accidental extra token that we can throw away. Something like // A B C D and we got to B but the token we got was neither // C, D nor anything following. // // NoViableAltException - The token we saw isn't predicted by any alternative // path available at this point in the current rule. // something like: ... (B|C|D|E) but we got Z which does // not follow from anywhere. // // EarlyExitException - The parser wants one or more of some construct but there // were none at all in the input stream. Something like // X SEMI+ // // MismatchedSetException - The parser would have accepted any one of two or more // tokens, but the actual token was not in that set and // was not a token that we could determine was spurious or // from which we could determine that we just had a token missing. // // Other exceptions, and some of the above, are dealt with as generic RecognitionExceptions // // Leadin is always the same apology // mb.append("Sorry, I was trying to understand "); mb.append(posDescription); if (e instanceof UnwantedTokenException) { // We had an extraneous token in the stream, so we have discarded it // for error recovery but still need to report it. // UnwantedTokenException ute = (UnwantedTokenException) e; CommonToken uwt = (CommonToken)ute.getUnexpectedToken(); // Inveigh about the extra token // mb.append(" but I got confused when I found an extra "); mb.append(getTokenErrorDisplay(uwt)); TokenClassification tokenClass = classifyToken(e.token); // Don't ramble by repeating things like "...extra identifier, which is an identifier that should not be there" // if ( tokenClass != TokenClassification.UNKNOWN && tokenClass != TokenClassification.OPERATOR && !(posDescription.equalsIgnoreCase(tokenClass.forHumans())) ) { mb.append(" which is "); mb.append(tokenClass.forHumans()); } mb.append(" that should not be there"); // Work out what our start and end point should be for the error. When we have an extar // token in this language, it is quite often because the source code is coming from // the net beans (or other) IDE and the user is typing some new definition, viz: // // var // var answer : Integer = 42; // // In such a case, we would throw the error at the second instance of var, but // it is more useful for the IDE if we throw the error at the first instance // (for various reasons). Hence we do a check here to see if the prior token is the // same type as the current token. If it is, then we report the error with // reference to the prior token. Note that we have already consumed the token // when we get here bceause this is an error that is not sent back to the parser // it is just auto-recovered, so we need to use LA(-2) here. // if (uwt.getType() == input.LA(-2)) { // Replace the token with the previous token // uwt = (CommonToken)(input.LT(-2)); ute.token = uwt; } sp = uwt.getStartIndex(); ep = uwt.getStopIndex()+1; } else if (e instanceof MissingTokenException) { // We were able to work out that there was just a single token missing // and need to report this like that. // MissingTokenException mte = (MissingTokenException) e; // Say what we think is missing // mb.append(" but I got confused because "); TokenClassification tokenClass = classifyToken(mte.expecting); if (posDescription.equalsIgnoreCase(tokenClass.forHumans())) { mb.append("you seem to have omitted this"); } else if (mte.expecting == Token.EOF) { mb.append("I was looking for the end of the script here"); } else { mb.append("you seem to have missed out '"); mb.append(tokenNames[mte.expecting]); mb.append("'"); if ( tokenClass != TokenClassification.UNKNOWN && tokenClass != TokenClassification.OPERATOR && !posDescription.equalsIgnoreCase(tokenClass.forHumans()) ) { mb.append(" which is "); mb.append(tokenClass.forHumans()); } mb.append(" that should be there"); } // The token is missing, so we want to use the char position directly // after the previous token and just make it a single character long. // This will be the insert point for the missing token, whatever is // actually at that position // sp = semiPos(); ep = sp+1; } else if (e instanceof MismatchedTokenException) { MismatchedTokenException mte = (MismatchedTokenException) e; TokenClassification tokenClass = classifyToken(e.token); mb.append(" but I got confused when I "); if (mte.token.getType() == Token.EOF) { mb.append("hit the end of the script."); // The start and end points come directly from the end of the prior token // sp = semiPos(); ep = sp+1; } else { mb.append("saw "); mb.append(getTokenErrorDisplay(e.token)); if ( tokenClass != TokenClassification.UNKNOWN && tokenClass != TokenClassification.OPERATOR && !posDescription.equalsIgnoreCase(tokenClass.forHumans()) ) { mb.append(" which is "); mb.append(tokenClass.forHumans()); } // The start and end points come directly from the mismatched token. // sp = ((CommonToken)mte.token).getStartIndex(); ep = ((CommonToken)mte.token).getStopIndex()+1; } if (tokenClass == TokenClassification.KEYWORD && mte.expecting == v4Parser.IDENTIFIER) { mb.append(".\n Perhaps you tried to use a keyword as the name of a variable (use <<keyword>> if you need to do this)"); } else if (mte.expecting != Token.EOF) { mb.append(".\n Perhaps you are missing a "); mb.append("'" + tokenNames[mte.expecting]+"'"); } else { mb.append(".\n I was looking for the end of the script here"); } } else if (e instanceof NoViableAltException) { NoViableAltException nvae = (NoViableAltException) e; TokenClassification tokenClass = classifyToken(e.token); mb.append(" but I got confused when I "); if (nvae.token.getType() == Token.EOF) { mb.append("hit the end of the script."); // The start and end points come directly from the end of the prior token // sp = semiPos(); ep = sp+1; } else { mb.append("saw "); mb.append(getTokenErrorDisplay(e.token)); if ( tokenClass != TokenClassification.UNKNOWN && tokenClass != TokenClassification.OPERATOR && !posDescription.equalsIgnoreCase(tokenClass.forHumans()) ) { mb.append(" which is "); mb.append(tokenClass.forHumans()); } if (tokenClass == TokenClassification.KEYWORD && (stackTop.equals("name") || stackTop.equals("identifier"))) { mb.append(".\n Perhaps you tried to use a keyword as the name of a variable (use <<keyword>> if you need to do this)"); } // The start and end points come directly from the mismatched token. // sp = ((CommonToken)nvae.token).getStartIndex(); ep = ((CommonToken)nvae.token).getStopIndex()+1; } } else if (e instanceof MismatchedSetException) { MismatchedSetException mse = (MismatchedSetException)e; mb.append(" but I got confused when I saw "); mb.append(getTokenErrorDisplay(e.token)); TokenClassification tokenClass = classifyToken(e.token); if ( tokenClass != TokenClassification.UNKNOWN && tokenClass != TokenClassification.OPERATOR && !posDescription.equalsIgnoreCase(tokenClass.forHumans()) ) { mb.append(" which is "); mb.append(tokenClass.forHumans()); } mb.append(".\n I was looking for one of: "+ mse.expecting); // The start and end points come directly from the mismatched token. // sp = ((CommonToken)e.token).getStartIndex(); ep = ((CommonToken)e.token).getStopIndex()+1; } else { // The start and end points come directly from the mismatched token. // sp = ((CommonToken)e.token).getStartIndex(); ep = ((CommonToken)e.token).getStopIndex()+1; mb.append( super.getErrorMessage(e, tokenNames) ); } // Having constructed the error string, and decided on our start // and end points, then we need to create an erroneous node, which we will // eventually supply within the AST, but will also use for logging // the error message, so that the diagnostic positions are useful to // anyone listening to the diagnostics. // errorNode = F.at(sp).Erroneous(); endPos(errorNode, ep); // Give back the string // return mb.toString(); } /** public String getTokenErrorDisplay(Token t) { return t.toString(); } **/ /** * Creates the error/warning message that we need to show users/IDEs when * ANTLR has found a parsing error, has recovered from it and is now * telling us that a parsing exception occurred. * * We call our own override of getErrorMessage, and this will build the * a string that is geared towards the Visage author. Then we work out * where we are in the character stream and record the error using the * Visage infrastructure. */ @Override public void displayRecognitionError(String[] tokenNames, RecognitionException e) { // Now we build the appropriate error message // String msg = getErrorMessage(e, getVisageTokenNames(tokenNames)); // And record the information using the Visage error sink and the // DiagnosticPostion interface of the errorNode, which is created // by the getErrorMessage call. // log.error(errorNode, MsgSym.MESSAGE_VISAGE_GENERALERROR, msg); } /** * Creates the error/warning message that we need to show users/IDEs when * ANTLR has found a parsing error, has recovered from it and is now * telling us that a parsing exception occurred. * * We call our own override of getErrorMessage, and this will build the * a string that is geared towards the Visage author. Then we work out * where we are in the character stream and record the error using the * Visage infrastructure. */ public void displayRecognitionError(String[] tokenNames, RecognitionException e, VisageTree node) { // Now we build the appropriate error message // String msg = getErrorMessage(e, getVisageTokenNames(tokenNames)); // And record the information using the Visage error sink and the // DiagnosticPostion interface of the supplied node. // log.error(node, MsgSym.MESSAGE_VISAGE_GENERALERROR, msg); } /** * Provides a reference to the array of human readable descriptions * of each token that the lexer can generate. * @param tokenNames The names of the tokens as ANTLR sees them * @return An array of human readable descriptions indexewd by the ANTLR generated token type (integer) */ protected String[] getVisageTokenNames(String[] tokenNames) { // If we have already generated this array, then we jsut return the // reference to it. // if (visageTokenNames != null) { return visageTokenNames; } else { // This is the first request for the array, so we build it // on the fly. // visageTokenNames = new String[tokenNames.length]; int count = 0; for (String tokenName:tokenNames) { String visageTokenName = tokenMap.get(tokenName); if (visageTokenName == null) { visageTokenNames[count] = tokenName; } else { visageTokenNames[count] = visageTokenName; } count++; } return visageTokenNames; } } /** * Calculates the current character position in the input stream. * This method skips whitespace tokens by virtue of using LT(1) * which automatically skips off channel tokens. Use when there is * no token yet exmined in a rule. * * @return The character position of the next non-whitespace token in the input stream * */ protected int pos() { return pos(input.LT(1)); } /** * Calculates the character position of the first character of the text * in the input stream that the supplied token represents. * @param tok The token to locate in the input stream * @return The character position of the next non-whitespace token in the input stream */ protected int pos(Token tok) { return ((CommonToken)tok).getStartIndex(); } /** * Calculates the position in the character stream where a missing * semi-colon looks like it ought to have been. * * The method is called from the rule that detects that there should have been * a semi colon to terminate a statement or expression, hence the input * stream will be positioned too far ahead of the position we are looking to report. * To find where we should report we need to search backwards in the input stream for * the first non-hidden token before the current one, then position after the end of * the text that that token represents. */ protected int semiPos() { CommonToken tok; // Traverse backwards until we find a token that is on the default // channel. // tok = (CommonToken)(input.LT(-1)); // If the source consists of just one token, say 'function' then // we can actually end up positioned at first token, we get null back if this // happens and use the current token instead. // if (tok == null) { tok = (CommonToken)(input.LT(1)); } // Just in case somethign goes wrong getting ANY token, check for null // if (tok == null) { return 0; } // Now, all we need to do is position after the last character of the // text that this token represents. // return tok.getStopIndex() + 1; } /** * Associate a documentation comment with a particular AST. * * The parser keeps a map off all the AST fragements which it has * identified has having a documentation comment. This is the * method that creates and builds that list as the parser rules * find out associations. * * @param tree The tree or tree fragment with which the documentation comment should be associated. * @param comment The comment that has been identified as the documentation comment for this tree. */ void setDocComment(JCTree tree, CommonToken comment) { if (comment != null) { if (docComments == null) { docComments = new HashMap<JCTree,String>(); } docComments.put(tree, comment.getText()); } } /*** * Given a specific starting token, locate the first non-whitespace token * that preceeds it, returning it if it is a comment. * * A number of syntactical constructs can be preceded by a documentatin COMMENT which * is assocaitaed with the construct and should be placed in the AST. Such comments * must begin with the introduceer '/**'. * This method scans backwards from the supplied token until it finds a token that is * not considered to be WHITESPACE. If the token is a qualifying COMMENT then it is * deemed to belong to the construct that asked to locate the comment and is * returned to the caller. * * @param start The token from whence to search backwards in the token stream. * @return null if there is no associated comment, the token that contains the * comment, if there is. */ protected CommonToken getDocComment(Token start) { // Locate the position of the token before this one in the input stream // int index = start.getTokenIndex() - 1; // Loop backwards through the token stream until // we find a token that is not considered to be whitespace // or we reach the start of the token stream. // while (index >= 0) { Token tok = input.get(index); int type; // Because modifiers are dealt with uniformly now, we must ignore // them when running backwards looking for comments. // type = tok.getType(); if ( type == v4Parser.WS || type == v4Parser.ABSTRACT || type == v4Parser.BOUND || type == v4Parser.DEFAULT || type == v4Parser.OVERRIDE || type == v4Parser.PACKAGE || type == v4Parser.PROTECTED || type == v4Parser.PUBLIC || type == v4Parser.PUBLIC_READ || type == v4Parser.PUBLIC_INIT || type == v4Parser.MIXIN //TODO: deprecated -- remove this at some point // || type == v4Parser.STATIC || type == v4Parser.LINE_COMMENT || type == v4Parser.COMMENT) { --index; } else { break; } } // Assuming that we have found a valid token (not reached the // the token stream start, check to see if that token is a DOC_COMMENT // and return null if it is not. // if (index < 0 || input.get(index).getType() != v4Parser.DOC_COMMENT) { return null; } // We have documentation comment, rather than just a normal comment. // return (CommonToken) (input.get(index)); } /** * Given a list of interpolation values, create an entry for the supplied AST node * in the end position map using the end position of the last AST node in the interpolation * value list. * * @param tree The AST node that we wish to create an endpos for * @param list A list of interpolation value AST nodes. */ void endPos(JCTree tree, com.sun.tools.mjavac.util.List<VisageInterpolateValue> list) { if (genEndPos) { int endLast = endPositions.get(list.last()); endPositions.put(tree, endLast); } } /** Using the current token stream position as the start point * search back through the input token stream and set the end point * of the supplied tree object to the first non-whitespace token * we find. * * Note that this version of endPos() is called when all elements of a * construct have been parsed. Hence we traverse back from one token * before the current index. */ void endPos(JCTree tree) { CommonToken tok; // Unless we are at the very start, then the token that // ended whatever AST fragment we are constructing was the // one before the one at the current index and so we need // to start at that token. // tok = (CommonToken)(input.LT(-1)); if (tok == null) { // This can happen if the first thing is a script member // declaration and it has no modifiers, modifiers is then // starting at 0 and ending at 0 // tok = (CommonToken)(input.LT(1)); endPos(tree, tok.getStartIndex()); } else { // We have found a token that is non-whitespace and is not BOF // endPos(tree, tok.getStopIndex() + 1); } } /** * Create the end position map entry for the given JCTree at the supplied * character inde, which is the offset into the script source. * * @param tree The tree for which we are mapping the endpoint * @param end The character position in the input stream that matches the end of the tree */ void endPos(JCTree tree, int end) { // Check that we are not trying to create an endPos that is before the // start of the tree. This can happen if we were in error recovery mode from // a missing element, and ended up taking the end position of the token // in the stream prior to the place where the missing element shoudl be. In that // case we are creating an erroneous node and it will be empty of error nodes, // so gets an end positon the same as its start position. // if (tree != null) { int start = tree.getStartPosition(); if (end <= start) end = start + 1; if (tree instanceof VisageBlock) ((VisageBlock) tree).endpos = end; if (genEndPos) { endPositions.put(tree, end); } } } /** * * @return */ protected List noVisageTrees() { return List.<VisageTree>nil(); } /** * Examines the token stream to see if we require a SEMI * token to terminate the previous statement, or we do not. * * The rules for deciding wheter a SEMI is required here or * not are reasonably straight forward: * * 1) If the next token is a '}' then we do not required a SEMI * as the last statement of a block does not need to terminate * with a SEMI; * * 2) If the next token is EOF then we do not require a SEMI as * the last statement of the script does not require termination; * * 3) If the previous token was a '}' then we do not require a SEMI * as brace blocks do not require termination ever. * * 4) If the previous token was itself a SEMI then we assume that * the prior statement was terminated correctly. * * 5) If the next token is ELSE, then the prior single statement * of a then clause does not require a SEMI. * For instance if (x) a else b; * * Note that we always consume a SEMI colon here if there is one * as there is never any harm in having too many SEMIs. * * @param contextMessage Message context to use when reporting that a required SEMI is missing */ protected void checkForSemi() { Token nextTok = input.LT(1); int nextTokType = nextTok.getType(); //System.out.println("Check " + contextMessage); //System.out.println(" next token is '" + nextTok.getText() + "'"); //System.out.println(" previous token is '" + input.LT(-1).getText() + "'"); // If there is a SEMI colon next anyway, then we just eat it // if (nextTokType == v4Parser.SEMI) { // Just consume it and return then // input.consume(); return; } // Ignore if next token is something that relaxes the rules // if ( nextTokType == v4Parser.RBRACE || nextTokType == Token.EOF || nextTokType == v4Parser.ELSE || nextTokType == v4Parser.RBRACE_LBRACE_STRING_LITERAL || nextTokType == v4Parser.RBRACE_QUOTE_STRING_LITERAL ) { // The SEMI was optional anyway so just return // return; } // Now we need to know the previous on channel token // Token prevToken = input.LT(-1); if ( prevToken == null || prevToken.getType() == v4Parser.RBRACE || prevToken.getType() == v4Parser.SEMI ) { // We don't require a SEMI after a '}' or after a prior SEMI or if // this error occurred on the first token (in whcih case prevToken is null) // return; } // OK, having got here, we must require a SEMI and it is missing // so issue the error. // log.error(semiPos(), MsgSym.MESSAGE_VISAGE_SEMI_REQUIRED); } /** * If the parser is able to recover from the fact that a single token * is missing from the input stream, then it will call this method * to manufacture a token for use by actions in the grammar. * * In general the tokens we will need to manufacture here will be things * like identifiers, missing parens and braces and other fairly simple constructs * as these can be recognized from the union of follow sets that can be * constructed at any one point. * * @param input The token stream where we are normally drawing tokens from * @param e The exception that was raised by the parser * @param expectedTokenType The type of the token that the parser was expecting to see next * @param follow The followset of tokens that can follow on from here * @return A newly manufactured token of the required type */ @Override protected Object getMissingSymbol(IntStream input, RecognitionException e, int expectedTokenType, BitSet follow) { // Used to manufacture the token that we will insert into // the input stream // MissingCommonToken t; // The token string contents, so that we can make up some sensible // error value. // String tokenText; // Pick up the prior token (the one we will return after this // manufactured one), and use it to generate position information // for our fake token. // CommonToken current = (CommonToken)((TokenStream)input).LT(-1); // If there was no next token, then we use the next // token as a reference point. // if ( current.getType() == Token.EOF ) { current = (CommonToken)((TokenStream)input).LT(1); } // Work out what type of token we were expecting so we can // use it in the token next if we need to. // TokenClassification tokenClass = classifyToken(expectedTokenType); // When we are manufacturing a token for error recovery, we must intercept // a number of token types and create something that can be sensibly used // by the Visage AST to indicate that it was in error. Otherwise the AST // will appear to be perfectly correct. // switch (expectedTokenType) { case Token.EOF: // If we were expection end of file at this point, then // there is a little extra work to do. // tokenText = "<missing EOF>"; break; case v4Parser.TIME_LITERAL: // A time literal needs special handling so if anything wants to // try and use the value it should contain, then it needs to be // some valid default value. Here we use 1 second as a default // tokenText = "1s"; break; case v4Parser.IDENTIFIER: // A missing indentifer, which we adorn with text to make sure // it is obvious that it is inserted by this routine, in case // checking the class instance is impractical somewhere down stream. // tokenText = "<missing IDENTIFIER>"; break; // For anything else, we use the default methodology // default: // We create text that is some indication of what was missing // tokenText = "<missing " + tokenClass.forHumans() + ">"; break; } // We have created the raw information we need, so now we can // manufacture the token and just return it for inclusion in // the input stream. // t = new MissingCommonToken(expectedTokenType, tokenText); // Use the current/prior token to make up a position for the // manufactured one, one character after the end of the previous one. // t.setLine (current.getLine()); t.setCharPositionInLine (current.getStopIndex() + 1); t.setChannel (DEFAULT_TOKEN_CHANNEL); // Our manufactured token is complete so let's return it // return t; } // ---------------------------------------------------------------------- // Error recovery methods. // // In the general and most simple cases, ANTLR recognizes will do simple error // recovery pretty well, as it will detect things like a single missing token // or a single extraneous token. Howeer, its default in other cases is to // delete a single token and throw a RecognitionException. It will try to // resync the token stream, but if the source is 'very' erroneous, then all // it can really do is consume a token and see if that helps. // // In any particular rule, we have more or less an idea of context and in // these cases we take some specific actions for recovery that will resync // the input stream to somewhere that is more likely to allow us to carry // on parsing. // /** * Called to resync the input stream when we received an exception trying * to start the parse of a class member. This happens when the upcoming * stream is very erroneous, such as when someone has tried to place * soenthing in a class definition that has no business being there, or * has left out a critical keyword such as FUNCTION or VAR and we can * therefore just not predict what the code is trying to declare. * * As the class member will be completely out of context, the best thing * we can do is resync to the start of another, viable class member definition. * * @param ruleStart The position in the input stream of the first token that * spans the elements in error. * @param re The exception that the parser threw to get us heer, in case we * can use that information. * @return A Visage error node for the AST that spans the start and end of * all the tokesn that we had to discard in order to resync somewhere * sensible. */ protected VisageErroneous resyncClassMember(int ruleStart, RecognitionException re) { // First lets find out what the follow set is from this particular context // BitSet follow = computeContextSensitiveRuleFOLLOW(); // Brace depth for terminating consumption // int braceDepth = 0; for (;;) { int ttype = input.LA(1); boolean consumeNext = false; switch(ttype) { case Token.EOF: // Reached end of file, we must stop case v4Parser.INIT: // Reached an init definition case v4Parser.POSTINIT: // Reached a post init definition case v4Parser.OVERRIDE: // Override defintion // Any of the modifiers, we must assume to be a new class member // even if they are not allowed here, as they may be just erroneously // specified. // case v4Parser.ABSTRACT: case v4Parser.BOUND: case v4Parser.DEFAULT: case v4Parser.PACKAGE: case v4Parser.PROTECTED: case v4Parser.PUBLIC: case v4Parser.PUBLIC_READ: case v4Parser.PUBLIC_INIT: // Variable declarations and member functions mean we are done consuming // case v4Parser.VAR: case v4Parser.DEF: case v4Parser.FUNCTION: // We found a token that looks like it is the start of a new // class member definition, or otherwise somewhere we should // end consumption; so we can break this loop. // consumeNext = false; break; case v4Parser.RBRACE: // A right brace forces us to consider the brace depth. // We assume that there was an opening brace for the class // definition, and that any opening braces being to the // erroneous class member, hence we force loop exit if, // after decrementing the brace level, we get to zero // braceDepth--; if (braceDepth == 0) { consumeNext = false; } break; case v4Parser.LBRACE: // An opening brace must belong to some constrcut within the erroneous // class member, and so we count it, and consume it. // braceDepth++; consumeNext = true; break; default: // The next token was not anythig nwe wanted to sync to, so // just consume it and move on. // consumeNext = true; break; } // Now, are we consuming still, or are we done? // if (consumeNext) { input.consume(); } else { break; } } // We have resynced to somewhere with a possibilty to recover from // So we need to create an erroneous node that covers everything // we skipped. // VisageErroneous errNode = F.at(ruleStart).Erroneous(); endPos(errNode); // The caller will send the AST node to whereever it needs to be // in the build structure. // return errNode; } /** * Called to resync the input stream after we failed to make any sense of * what should have been a type such as : String and so on. * * Performs customized resynchronization of the input stream and returns * either a missing type node or an erroneous node, depending on whether it * can make any sense of the error, or just has to resync to the followSet. * * @param ruleStart The position in the input stream of the first token that * spans the elements in error. * @param re The exception that the parser threw to get us heer, in case we * can use that information. * @return Either a Visage error node for the AST that spans the start and end of * all the tokens that we had to discard in order to resync somewhere * sensible, or a VisageMissingType */ protected VisageType resyncType(int ruleStart, RecognitionException re) { VisageType errNode; // If we got an NVA here then basically there was no typeName or typeArgList // and so on. We create a missing type is the rule has consumed no tokens // as we know that the rule was supposed to match a type and there was nothing // there (and we could not manufacture anything it seems.) If we have consumed some // tokens, then we create an erroneous node, resync and move on. // if (re instanceof NoViableAltException) { // Now create an AST node that represents a missing type, The required entry // is of type Name so we use an identifier name that cannot exist in // Visage, so that IDEs can detect it. // errNode = F.at(ruleStart).ErroneousType(); } else { // Perform standard ANTLR recovery. // recover(input, re); errNode = F.at(ruleStart).ErroneousType(); } // Calculate the AST span we have covered // endPos(errNode); return errNode; } /** * Use the current stacked followset to work out the valid tokens that * can follow on from the current point in the parse, then recover by * eating tokens that are not a member of the follow set we compute. * * This method is used whenever we wish to force a sync, even though * the parser has not yet checked LA(1) for alt selection. This is useful * in situations where only a subset of tokens can begin a new construct * (such as the start of a new statement in a block) and we want to * proactively detect garbage so that the current rule does not exit on * on an exception. * * We could override recover() to make this the default behavior but that * is too much like using a sledge hammer to crack a nut. We want finer * grained control of the recovery and error mechanisms. */ protected void syncToGoodToken() { // Compute the followset that is in context whereever we are in the // rule chain/stack // BitSet follow = state.following[state._fsp]; //computeContextSensitiveRuleFOLLOW(); syncToGoodToken(follow); } /** * Temporary to get around bug in ANTLR 3.1 followSet generation */ protected void syncToGoodClassToken() { BitSet follow = new BitSet(); // Normal follow set // follow.add(v4Parser.INIT); follow.add(v4Parser.ABSTRACT); follow.add(v4Parser.BOUND); follow.add(v4Parser.DEF); follow.add(v4Parser.DEFAULT); follow.add(v4Parser.FUNCTION); follow.add(v4Parser.OVERRIDE); follow.add(v4Parser.PACKAGE); follow.add(v4Parser.POSTINIT); follow.add(v4Parser.PRIVATE); follow.add(v4Parser.PROTECTED); follow.add(v4Parser.PUBLIC); follow.add(v4Parser.PUBLIC_INIT); follow.add(v4Parser.PUBLIC_READ); follow.add(v4Parser.PRIVATE); follow.add(v4Parser.RBRACE); follow.add(v4Parser.SEMI); follow.add(v4Parser.STATIC); follow.add(v4Parser.VAR); // Additional elements that we want to halt on if syncing. // syncToGoodToken(follow); } protected void syncToGoodToken(BitSet follow) { int mark = -1; try { input.mark(); // Consume all tokens in the stream until we find a member of the follow // set, which means the next production should be guarenteed to be happy. // while (! follow.member(input.LA(1)) ) { if (input.LA(1) == Token.EOF) { // Looks like we didn't find anything a tall that can help us here // so we need to rewind to where we wer and let normal error handling // bail out. // input.rewind(); return; } input.consume(); } } catch (Exception e) { // Just ignore any errors here, we will just let the recognizer // try to resync as normal - something must be very screwed. // } finally { // Always release the mark we took // if (mark != -1) { input.release(mark); } } } }