/* * Copyright 2008-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ package org.visage.tools.antlr; import com.sun.tools.mjavac.util.Convert; import com.sun.tools.mjavac.util.Log; import org.visage.tools.util.MsgSym; import org.antlr.runtime.*; /** * Base class for ANTLR generated parsers * * @author Robert Field * @author Zhiqun Chen */ public abstract class AbstractGeneratedLexerV4 extends org.antlr.runtime.Lexer { /** * The log to be used for error diagnostics. */ protected Log log; /** * Initial value of the brace quote tracker provides a door stop for leaving * quotes altogether (not yet processing any {} expressions). */ private final BraceQuoteTracker NULL_BQT = new BraceQuoteTracker(null, '\'', false); /** * Tracks the level of nested {} that the lexer is currently processing * within. */ private BraceQuoteTracker quoteStack = NULL_BQT; // quote context -- static final int CUR_QUOTE_CTX = 0; // 0 = use current quote context static final int SNG_QUOTE_CTX = 1; // 1 = single quote quote context static final int DBL_QUOTE_CTX = 2; // 2 = double quote quote context // Recorded start of string with embedded expression // protected int eStringStart = 0; /** * Construct a new Visage lexer with no pre-known input stream */ protected AbstractGeneratedLexerV4() { } /** * Construct a new Visage lexer installing the character stream at * the same time. * * @param input The character stream that the lexer will scan, which should already * be opened and initialized. */ protected AbstractGeneratedLexerV4(CharStream input) { super(input); } /** * Construct a new Visage lexer installing the character stream and * the shared state (used if there is more than one lexer, which is currently * not used by the Visage compiler) at the same time. * * @param input The character stream that the lexer will scan, which should already * be opened and initialized. * @param state The lexer state object that was created by a previously created lexer * */ protected AbstractGeneratedLexerV4(CharStream input, RecognizerSharedState state) { super(input, state); } /** * Used in lexer rule actions to process the characters scanned to match a literal string. * * Converts the literal string by removing bounding delimiters such as "xxx" 'xxx' "xxx{ * and so on, to yield xxx. Then sets the converted text to be the text associated * with the lexer token that is currently being processed (and from whence this * method is called. */ void processString() { setText(StringLiteralProcessor.convert(log, getCharIndex(), getText())); } /** * Used in lexer rule actions to create a literal string conforming to * the format string of Visage compound string: %pattern -> internal representation. */ void processFormatString() { // Add quote characters and adjust the index to invoke StringLiteralProcessor.convert(). // StringBuilder sb = new StringBuilder(); sb.append('"').append(getText()).append('"'); setText(StringLiteralProcessor.convert(log, getCharIndex() + 1, sb.toString())); } /** * Called from lexer rule actions to convert the external form of a string literal * translation key into the internal form. */ void processTranslationKey() { String text = getText().substring(2); // remove '##' if (text.length() > 0) { text = StringLiteralProcessor.convert(log, getCharIndex(), text); } setText(text); } /** * Called by lexer rule actions when the lexer detects a '{' within a literal string * and has worked out whether a format '%xxx' will follow. * @param quote The type of literal string quite " or ' * @param nextIsPercent Whether there is a following %format string */ protected void enterBrace(int quote, boolean nextIsPercent) { quoteStack.enterBrace(quote, nextIsPercent); } /** * Called by lexer rule actions when the lexer detects a ' or " that * closes a literal string (which means '{' no longer indicate * string embedded expressions. */ protected void leaveQuote() { quoteStack.leaveQuote(); } /** * Used in the lexer as a gated semantic predicate to indicate whether * a right brace '}' is currently expcted to indicate closure of * an embedded string literal expression or not. * @param quote The type of quote ' or " that we are looking to see if the } is embedded within * @return true indicates that the right brace should be seen as ending an embedded * expression within the quoted literal string type indicated by the quote * parameter. false indicates that the brace is just a brace, for say block * closure. */ protected boolean rightBraceLikeQuote(int quote) { return quoteStack.rightBraceLikeQuote(quote); } /** * Called by the lexer rules to indicate that we have found a '}' within * a literal string and are therefore exiting one level of * nested expression depth. */ protected void leaveBrace() { quoteStack.leaveBrace(); } /** * Used as a gated semantic predicate by the lexer to decide if the * '%' it is about to scan is the introducer to the format string * for an embdedded string expression or not. * @return true '%' is starting a string format specficaier * false '%' is just a '%' */ protected boolean percentIsFormat() { return quoteStack.percentIsFormat(); } /** * Called by the lexer rules after a '%' has been recognized * as a format string specifier and therfore any more '%' are not * indicating a format string. */ protected void resetPercentIsFormat() { quoteStack.resetPercentIsFormat(); } /** * Returns and indicator of what level of nested expressions * the lexer is currently within. * @return 0 - start ste, no expressions are active. >1 indicates * the level of nesting that the lexer is currently processing. */ protected int getLexicalState() { return quoteStack.getLexicalState(); } /** * Overrides the standard ANTLR 3.1 lexer error message generator * to provide a message that will make more sense to Visage programmers. * @param e The exception that the lexer raised because it could not decode * what to do next. * @param tokenNames The ANTLR supplied list of token names as used in the lexer. * @return The string that shuold be used as the error message by the Visage compiler */ @Override public String getErrorMessage(RecognitionException e, String[] tokenNames) { StringBuffer mb = new StringBuffer(); // No viable alt means that somehow the lexer rule or the // lexer itself found a character that cannot match any // decisions points. In theory, as of the v4 lexer, this cannot // happen unless something went wrong in the gramamr analysis. // However, because there are predicates used for embedded string // expressions, and this can play with the analysis, we cater for it // anyway. // if (e instanceof NoViableAltException) { if (e.c == Token.EOF) { // Changes in the v4 lexer mean that it shoudl be virtually impossible // to trigger this error. However it is perhaps possible if the lexer // predicts a token, tries to match it and discovers EOF because this // is a file produced on Windows and has no terminating \n. Hence // we look for this EOF sceanrio and report it nicely. // mb.append("Sorry, I scanned to the end of your script from around line " + e.line + " but could not see how to process it. "); mb.append("This can happen if you forget a closing delimiter such as ''' '\"' or '{'"); } else { // We managed to predict some lexer token that once we started // down the path, turned out not to be what we thought it was. // With the v4 lexer, this shoudl not be happening, but this message // is used as belt and braces protection. // mb.append("Sorry, but the character " + getCharErrorDisplay(e.c)); mb.append("is not allowed in a Visage. Well at least, not here."); } } else { // Any other kind of exception is something we cannot really deal with // here. So we gather ANTLR's assessment of the error state and // use that. // mb.append(super.getErrorMessage(e, tokenNames)); } return mb.toString(); } /** * Override for the ANTLR 3.x message display routine so that we can log * errors within the Visage compiler infrastructure. * * @param tokenNames ANTLR provided array of the lexer token names * @param e The excpetion that was raised by the lexer, for further action. */ @Override public void displayRecognitionError(String[] tokenNames, RecognitionException e) { // Find out how we wish to describe this expcetion to the script author/user // String msg = getErrorMessage(e, tokenNames); // Record the error for later output or capture by development tools // log.error(getCharIndex(), MsgSym.MESSAGE_VISAGE_GENERALERROR, msg); } protected boolean checkIntLiteralRange(String text, int pos, int radix, boolean negative) { // Because Long.MIN_VALUE < -Long.MAX_VALUE we need to use the actual negative when present // String checkText = negative? "-" + text : text; // Correct start position for error display // pos = pos - checkText.length(); try { Convert.string2long(checkText, radix); } catch (Exception e) { // Number form was too outrageous even for the converter // log.error(pos, MsgSym.MESSAGE_VISAGE_LITERAL_OUT_OF_RANGE, "Long", checkText); return false; } return true; } protected boolean checkColorString(String text, int pos) { // valid strings: #rgb, #rrggbb, #rgb|a, or #rrggbb|aa int total = text.length(); int length = 0; int dividerLoc = -1; boolean valid = true; for (int i = 1; i < total; i++) { if (text.charAt(i) == '|') { if (dividerLoc != -1) valid = false; dividerLoc = i; } else { length++; } } valid &= (length == 3 || length == 6) && dividerLoc == -1 || length == 4 && dividerLoc == 4 || length == 8 && dividerLoc == 7; if (!valid) { log.error(pos, MsgSym.MESSAGE_VISAGE_COLOR_WRONG_FORMAT, text); } return valid; } /** * Tracker for the quotes and braces used to define embedded expressions within literal strings * such as "He{"l{"l"}o"} world". */ protected class BraceQuoteTracker { /** * How many levels deep is this instance, within nests such as {{{{{{}}}}}} */ private int braceDepth; /** * Which quote is this instance tracking: ' or " */ private char quote; /** * Indicates whether, at this tracking level and in the current * lexing state, a following '%' should be seen as introducing a * string formatting specification or just as a normal '%' character. */ private boolean percentIsFormat; /** * Tracks the tracker instance prior to this instance of the tracker. */ private BraceQuoteTracker next; /** * Constructs a new instance of the tracker, and stores a reference * to the provided current instance on the tracker stack. * * @param prev * @param quote * @param percentIsFormat */ private BraceQuoteTracker(BraceQuoteTracker prev, char quote, boolean percentIsFormat) { this.quote = quote; this.percentIsFormat = percentIsFormat; this.braceDepth = 1; this.next = prev; } /** * Causes a new instance of the tracker class to be created then placed at the * top of the tracking stack, with a reference to the current tracking instance * @param quote Type of quoteed string " ' that we are tracking within * @param percentIsFormat Whether we should expect a format specification or not */ void enterBrace(int quote, boolean percentIsFormat) { if (quote == 0) { // exisiting string expression or non string expression if (quoteStack != NULL_BQT) { ++quoteStack.braceDepth; quoteStack.percentIsFormat = percentIsFormat; } } else { quoteStack = new BraceQuoteTracker(quoteStack, (char) quote, percentIsFormat); // push } } /** * Called to indicate that we are leaving teh current nested brace level * and find out what type of quoted string we are popping back in to. * * @return The type of quite " or ' that we are re-entering. */ char leaveBrace() { if (quoteStack != NULL_BQT && --quoteStack.braceDepth == 0) { return quoteStack.quote; } return 0; } /** * Retuns true if the right brace '}' is currently seen as ending an embedded expression. * @param quote Teh type of quoted literal string that the lexer is currently traversing. * @return true - use } to end an expression. false - we were not looking to end an expression. */ boolean rightBraceLikeQuote(int quote) { return quoteStack != NULL_BQT && quoteStack.braceDepth == 1 && (quote == 0 || quoteStack.quote == (char) quote); } /** * Called to indicate that the lexer has matched the closing quote of a literal * string. */ void leaveQuote() { assert (quoteStack != NULL_BQT && quoteStack.braceDepth == 0); quoteStack = quoteStack.next; // pop } /** * Called to indicate if the lexer shoudl see '%' as teh start of a * format specification, or not. * * @return true - the upcoming '%' is a format. false, the upcoming '%' is not a format. */ boolean percentIsFormat() { return quoteStack != NULL_BQT && quoteStack.percentIsFormat; } /** * Called by the lexer to indicate that it knows that any upcoming '%' cannot * possibly be the introducer for a format specification. */ void resetPercentIsFormat() { quoteStack.percentIsFormat = false; } /** * Calleld to find out if the lexer is currently scanning with brace quotes or not. * @return true if the lexer is traversing an embedded brace delimited expression and; * false if it is not. */ boolean inBraceQuote() { return quoteStack != NULL_BQT; } /** * Encode the lexical state into an integer, to permit incremental lexing in IDEs that support it * @return Level of emdedded */ int getLexicalState() { // This is a hack -- state is not invertible yet return (quoteStack == NULL_BQT) ? 0 : quoteStack.braceDepth; } } }