/*
* Copyright 2008-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package org.visage.tools.antlr;
import com.sun.tools.mjavac.util.Convert;
import com.sun.tools.mjavac.util.Log;
import org.visage.tools.util.MsgSym;
import org.antlr.runtime.*;
/**
* Base class for ANTLR generated parsers
*
* @author Robert Field
* @author Zhiqun Chen
*/
public abstract class AbstractGeneratedLexerV4 extends org.antlr.runtime.Lexer {
/**
* The log to be used for error diagnostics.
*/
protected Log log;
/**
* Initial value of the brace quote tracker provides a door stop for leaving
* quotes altogether (not yet processing any {} expressions).
*/
private final BraceQuoteTracker NULL_BQT = new BraceQuoteTracker(null, '\'', false);
/**
* Tracks the level of nested {} that the lexer is currently processing
* within.
*/
private BraceQuoteTracker quoteStack = NULL_BQT;
// quote context --
static final int CUR_QUOTE_CTX = 0; // 0 = use current quote context
static final int SNG_QUOTE_CTX = 1; // 1 = single quote quote context
static final int DBL_QUOTE_CTX = 2; // 2 = double quote quote context
// Recorded start of string with embedded expression
//
protected int eStringStart = 0;
/**
* Construct a new Visage lexer with no pre-known input stream
*/
protected AbstractGeneratedLexerV4() {
}
/**
* Construct a new Visage lexer installing the character stream at
* the same time.
*
* @param input The character stream that the lexer will scan, which should already
* be opened and initialized.
*/
protected AbstractGeneratedLexerV4(CharStream input) {
super(input);
}
/**
* Construct a new Visage lexer installing the character stream and
* the shared state (used if there is more than one lexer, which is currently
* not used by the Visage compiler) at the same time.
*
* @param input The character stream that the lexer will scan, which should already
* be opened and initialized.
* @param state The lexer state object that was created by a previously created lexer
*
*/
protected AbstractGeneratedLexerV4(CharStream input, RecognizerSharedState state) {
super(input, state);
}
/**
* Used in lexer rule actions to process the characters scanned to match a literal string.
*
* Converts the literal string by removing bounding delimiters such as "xxx" 'xxx' "xxx{
* and so on, to yield xxx. Then sets the converted text to be the text associated
* with the lexer token that is currently being processed (and from whence this
* method is called.
*/
void processString() {
setText(StringLiteralProcessor.convert(log, getCharIndex(), getText()));
}
/**
* Used in lexer rule actions to create a literal string conforming to
* the format string of Visage compound string: %pattern -> internal representation.
*/
void processFormatString() {
// Add quote characters and adjust the index to invoke StringLiteralProcessor.convert().
//
StringBuilder sb = new StringBuilder();
sb.append('"').append(getText()).append('"');
setText(StringLiteralProcessor.convert(log, getCharIndex() + 1, sb.toString()));
}
/**
* Called from lexer rule actions to convert the external form of a string literal
* translation key into the internal form.
*/
void processTranslationKey() {
String text = getText().substring(2); // remove '##'
if (text.length() > 0) {
text = StringLiteralProcessor.convert(log, getCharIndex(), text);
}
setText(text);
}
/**
* Called by lexer rule actions when the lexer detects a '{' within a literal string
* and has worked out whether a format '%xxx' will follow.
* @param quote The type of literal string quite " or '
* @param nextIsPercent Whether there is a following %format string
*/
protected void enterBrace(int quote, boolean nextIsPercent) {
quoteStack.enterBrace(quote, nextIsPercent);
}
/**
* Called by lexer rule actions when the lexer detects a ' or " that
* closes a literal string (which means '{' no longer indicate
* string embedded expressions.
*/
protected void leaveQuote() {
quoteStack.leaveQuote();
}
/**
* Used in the lexer as a gated semantic predicate to indicate whether
* a right brace '}' is currently expcted to indicate closure of
* an embedded string literal expression or not.
* @param quote The type of quote ' or " that we are looking to see if the } is embedded within
* @return true indicates that the right brace should be seen as ending an embedded
* expression within the quoted literal string type indicated by the quote
* parameter. false indicates that the brace is just a brace, for say block
* closure.
*/
protected boolean rightBraceLikeQuote(int quote) {
return quoteStack.rightBraceLikeQuote(quote);
}
/**
* Called by the lexer rules to indicate that we have found a '}' within
* a literal string and are therefore exiting one level of
* nested expression depth.
*/
protected void leaveBrace() {
quoteStack.leaveBrace();
}
/**
* Used as a gated semantic predicate by the lexer to decide if the
* '%' it is about to scan is the introducer to the format string
* for an embdedded string expression or not.
* @return true '%' is starting a string format specficaier
* false '%' is just a '%'
*/
protected boolean percentIsFormat() {
return quoteStack.percentIsFormat();
}
/**
* Called by the lexer rules after a '%' has been recognized
* as a format string specifier and therfore any more '%' are not
* indicating a format string.
*/
protected void resetPercentIsFormat() {
quoteStack.resetPercentIsFormat();
}
/**
* Returns and indicator of what level of nested expressions
* the lexer is currently within.
* @return 0 - start ste, no expressions are active. >1 indicates
* the level of nesting that the lexer is currently processing.
*/
protected int getLexicalState() {
return quoteStack.getLexicalState();
}
/**
* Overrides the standard ANTLR 3.1 lexer error message generator
* to provide a message that will make more sense to Visage programmers.
* @param e The exception that the lexer raised because it could not decode
* what to do next.
* @param tokenNames The ANTLR supplied list of token names as used in the lexer.
* @return The string that shuold be used as the error message by the Visage compiler
*/
@Override
public String getErrorMessage(RecognitionException e, String[] tokenNames) {
StringBuffer mb = new StringBuffer();
// No viable alt means that somehow the lexer rule or the
// lexer itself found a character that cannot match any
// decisions points. In theory, as of the v4 lexer, this cannot
// happen unless something went wrong in the gramamr analysis.
// However, because there are predicates used for embedded string
// expressions, and this can play with the analysis, we cater for it
// anyway.
//
if (e instanceof NoViableAltException) {
if (e.c == Token.EOF) {
// Changes in the v4 lexer mean that it shoudl be virtually impossible
// to trigger this error. However it is perhaps possible if the lexer
// predicts a token, tries to match it and discovers EOF because this
// is a file produced on Windows and has no terminating \n. Hence
// we look for this EOF sceanrio and report it nicely.
//
mb.append("Sorry, I scanned to the end of your script from around line " + e.line + " but could not see how to process it. ");
mb.append("This can happen if you forget a closing delimiter such as ''' '\"' or '{'");
} else {
// We managed to predict some lexer token that once we started
// down the path, turned out not to be what we thought it was.
// With the v4 lexer, this shoudl not be happening, but this message
// is used as belt and braces protection.
//
mb.append("Sorry, but the character " + getCharErrorDisplay(e.c));
mb.append("is not allowed in a Visage. Well at least, not here.");
}
} else {
// Any other kind of exception is something we cannot really deal with
// here. So we gather ANTLR's assessment of the error state and
// use that.
//
mb.append(super.getErrorMessage(e, tokenNames));
}
return mb.toString();
}
/**
* Override for the ANTLR 3.x message display routine so that we can log
* errors within the Visage compiler infrastructure.
*
* @param tokenNames ANTLR provided array of the lexer token names
* @param e The excpetion that was raised by the lexer, for further action.
*/
@Override
public void displayRecognitionError(String[] tokenNames, RecognitionException e) {
// Find out how we wish to describe this expcetion to the script author/user
//
String msg = getErrorMessage(e, tokenNames);
// Record the error for later output or capture by development tools
//
log.error(getCharIndex(), MsgSym.MESSAGE_VISAGE_GENERALERROR, msg);
}
protected boolean checkIntLiteralRange(String text, int pos, int radix, boolean negative) {
// Because Long.MIN_VALUE < -Long.MAX_VALUE we need to use the actual negative when present
//
String checkText = negative? "-" + text : text;
// Correct start position for error display
//
pos = pos - checkText.length();
try {
Convert.string2long(checkText, radix);
} catch (Exception e) {
// Number form was too outrageous even for the converter
//
log.error(pos, MsgSym.MESSAGE_VISAGE_LITERAL_OUT_OF_RANGE, "Long", checkText);
return false;
}
return true;
}
protected boolean checkColorString(String text, int pos) {
// valid strings: #rgb, #rrggbb, #rgb|a, or #rrggbb|aa
int total = text.length();
int length = 0;
int dividerLoc = -1;
boolean valid = true;
for (int i = 1; i < total; i++) {
if (text.charAt(i) == '|') {
if (dividerLoc != -1) valid = false;
dividerLoc = i;
} else {
length++;
}
}
valid &= (length == 3 || length == 6) && dividerLoc == -1
|| length == 4 && dividerLoc == 4
|| length == 8 && dividerLoc == 7;
if (!valid) {
log.error(pos, MsgSym.MESSAGE_VISAGE_COLOR_WRONG_FORMAT, text);
}
return valid;
}
/**
* Tracker for the quotes and braces used to define embedded expressions within literal strings
* such as "He{"l{"l"}o"} world".
*/
protected class BraceQuoteTracker {
/**
* How many levels deep is this instance, within nests such as {{{{{{}}}}}}
*/
private int braceDepth;
/**
* Which quote is this instance tracking: ' or "
*/
private char quote;
/**
* Indicates whether, at this tracking level and in the current
* lexing state, a following '%' should be seen as introducing a
* string formatting specification or just as a normal '%' character.
*/
private boolean percentIsFormat;
/**
* Tracks the tracker instance prior to this instance of the tracker.
*/
private BraceQuoteTracker next;
/**
* Constructs a new instance of the tracker, and stores a reference
* to the provided current instance on the tracker stack.
*
* @param prev
* @param quote
* @param percentIsFormat
*/
private BraceQuoteTracker(BraceQuoteTracker prev, char quote, boolean percentIsFormat) {
this.quote = quote;
this.percentIsFormat = percentIsFormat;
this.braceDepth = 1;
this.next = prev;
}
/**
* Causes a new instance of the tracker class to be created then placed at the
* top of the tracking stack, with a reference to the current tracking instance
* @param quote Type of quoteed string " ' that we are tracking within
* @param percentIsFormat Whether we should expect a format specification or not
*/
void enterBrace(int quote, boolean percentIsFormat) {
if (quote == 0) { // exisiting string expression or non string expression
if (quoteStack != NULL_BQT) {
++quoteStack.braceDepth;
quoteStack.percentIsFormat = percentIsFormat;
}
}
else {
quoteStack = new BraceQuoteTracker(quoteStack, (char) quote, percentIsFormat); // push
}
}
/**
* Called to indicate that we are leaving teh current nested brace level
* and find out what type of quoted string we are popping back in to.
*
* @return The type of quite " or ' that we are re-entering.
*/
char leaveBrace() {
if (quoteStack != NULL_BQT && --quoteStack.braceDepth == 0) {
return quoteStack.quote;
}
return 0;
}
/**
* Retuns true if the right brace '}' is currently seen as ending an embedded expression.
* @param quote Teh type of quoted literal string that the lexer is currently traversing.
* @return true - use } to end an expression. false - we were not looking to end an expression.
*/
boolean rightBraceLikeQuote(int quote) {
return quoteStack != NULL_BQT && quoteStack.braceDepth == 1 && (quote == 0 || quoteStack.quote == (char) quote);
}
/**
* Called to indicate that the lexer has matched the closing quote of a literal
* string.
*/
void leaveQuote() {
assert (quoteStack != NULL_BQT && quoteStack.braceDepth == 0);
quoteStack = quoteStack.next; // pop
}
/**
* Called to indicate if the lexer shoudl see '%' as teh start of a
* format specification, or not.
*
* @return true - the upcoming '%' is a format. false, the upcoming '%' is not a format.
*/
boolean percentIsFormat() {
return quoteStack != NULL_BQT && quoteStack.percentIsFormat;
}
/**
* Called by the lexer to indicate that it knows that any upcoming '%' cannot
* possibly be the introducer for a format specification.
*/
void resetPercentIsFormat() {
quoteStack.percentIsFormat = false;
}
/**
* Calleld to find out if the lexer is currently scanning with brace quotes or not.
* @return true if the lexer is traversing an embedded brace delimited expression and;
* false if it is not.
*/
boolean inBraceQuote() {
return quoteStack != NULL_BQT;
}
/**
* Encode the lexical state into an integer, to permit incremental lexing in IDEs that support it
* @return Level of emdedded
*/
int getLexicalState() {
// This is a hack -- state is not invertible yet
return (quoteStack == NULL_BQT) ? 0 : quoteStack.braceDepth;
}
}
}