AssignTokenTypesBehavior.java example

Explorer
FURCAS-master
/*
 [The "BSD licence"]
 Copyright (c) 2005-2008 Terence Parr
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.tool;

import org.antlr.analysis.Label;
import org.antlr.misc.Utils;

import java.util.*;

import antlr.collections.impl.ASTArray;

/** Move all of the functionality from assign.types.g grammar file. */
public class AssignTokenTypesBehavior extends AssignTokenTypesWalker {
	protected static final Integer UNASSIGNED = Utils.integer(-1);
	protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2);

	protected Map<String,Integer> stringLiterals = new LinkedHashMap();
	protected Map<String,Integer> tokens = new LinkedHashMap();
	protected Map<String,String> aliases = new LinkedHashMap();
	protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>();

	/** Track actual lexer rule defs so we don't get repeated token defs in
	 *  generated lexer.
	 */
	protected Set<String> tokenRuleDefs = new HashSet();

	protected void init(Grammar g) {
		this.grammar = g;
		currentRuleName = null;
		if ( stringAlias==null ) {
			// only init once; can't statically init since we need astFactory
			initASTPatterns();
		}
	}

	/** Track string literals (could be in tokens{} section) */
	protected void trackString(GrammarAST t) {
		// if lexer, don't allow aliasing in tokens section
		if ( currentRuleName==null && grammar.type==Grammar.LEXER ) {
			ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER,
									  grammar,
									  t.token,
									  t.getText());
			return;
		}
		// in a plain parser grammar rule, cannot reference literals
		// (unless defined previously via tokenVocab option)
		// don't warn until we hit root grammar as may be defined there.
		if ( grammar.getGrammarIsRoot() &&
			 grammar.type==Grammar.PARSER &&
			 grammar.getTokenType(t.getText())== Label.INVALID )
		{
			ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE,
									  grammar,
									  t.token,
									  t.getText());
		}
		// Don't record literals for lexers, they are things to match not tokens
		if ( grammar.type==Grammar.LEXER ) {
			return;
		}
		// otherwise add literal to token types if referenced from parser rule
		// or in the tokens{} section
		if ( (currentRuleName==null ||
			  Character.isLowerCase(currentRuleName.charAt(0))) &&
																grammar.getTokenType(t.getText())==Label.INVALID )
		{
			stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE);
		}
	}

	protected void trackToken(GrammarAST t) {
		// imported token names might exist, only add if new
		// Might have ';'=4 in vocab import and SEMI=';'. Avoid
		// setting to UNASSIGNED if we have loaded ';'/SEMI
		if ( grammar.getTokenType(t.getText())==Label.INVALID &&
			 tokens.get(t.getText())==null )
		{
			tokens.put(t.getText(), UNASSIGNED);
		}
	}

	protected void trackTokenRule(GrammarAST t,
								  GrammarAST modifier,
								  GrammarAST block)
	{
		// imported token names might exist, only add if new
		if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) {
			if ( !Character.isUpperCase(t.getText().charAt(0)) ) {
				return;
			}
			if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
				// don't add Tokens rule
				return;
			}

			// track all lexer rules so we can look for token refs w/o
			// associated lexer rules.
			grammar.composite.lexerRules.add(t.getText());

			int existing = grammar.getTokenType(t.getText());
			if ( existing==Label.INVALID ) {
				tokens.put(t.getText(), UNASSIGNED);
			}
			// look for "<TOKEN> : <literal> ;" pattern
			// (can have optional action last)
			if ( block.hasSameTreeStructure(charAlias) ||
				 block.hasSameTreeStructure(stringAlias) ||
				 block.hasSameTreeStructure(charAlias2) ||
				 block.hasSameTreeStructure(stringAlias2) )
			{
				tokenRuleDefs.add(t.getText());
				/*
			Grammar parent = grammar.composite.getDelegator(grammar);
			boolean importedByParserOrCombined =
				parent!=null &&
				(parent.type==Grammar.LEXER||parent.type==Grammar.PARSER);
				*/
				if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) {
					// only call this rule an alias if combined or lexer
					alias(t, (GrammarAST)block.getFirstChild().getFirstChild());
				}
			}
		}
		// else error
	}

	protected void alias(GrammarAST t, GrammarAST s) {
		String tokenID = t.getText();
		String literal = s.getText();
		String prevAliasLiteralID = aliasesReverseIndex.get(literal);
		if ( prevAliasLiteralID!=null ) { // we've seen this literal before
			if ( tokenID.equals(prevAliasLiteralID) ) {
				// duplicate but identical alias; might be tokens {A='a'} and
				// lexer rule A : 'a' ;  Is ok, just return
				return;
			}

			// give error unless both are rules (ok if one is in tokens section)
			if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) )
			{
				// don't allow alias if A='a' in tokens section and B : 'a'; is rule.
				// Allow if both are rules.  Will get DFA nondeterminism error later.
				ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT,
										  grammar,
										  t.token,
										  tokenID+"="+literal,
										  prevAliasLiteralID);
			}
			return; // don't do the alias
		}
		int existingLiteralType = grammar.getTokenType(literal);
		if ( existingLiteralType !=Label.INVALID ) {
			// we've seen this before from a tokenVocab most likely
			// don't assign a new token type; use existingLiteralType.
			tokens.put(tokenID, existingLiteralType);
		}
		String prevAliasTokenID = aliases.get(tokenID);
		if ( prevAliasTokenID!=null ) {
			ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT,
									  grammar,
									  t.token,
									  tokenID+"="+literal,
									  prevAliasTokenID);
			return; // don't do the alias
		}
		aliases.put(tokenID, literal);
		aliasesReverseIndex.put(literal, tokenID);
	}

	protected void defineTokens(Grammar root) {
/*
	System.out.println("stringLiterals="+stringLiterals);
	System.out.println("tokens="+tokens);
	System.out.println("aliases="+aliases);
	System.out.println("aliasesReverseIndex="+aliasesReverseIndex);
*/

		assignTokenIDTypes(root);

		aliasTokenIDsAndLiterals(root);

		assignStringTypes(root);

/*
	System.out.println("stringLiterals="+stringLiterals);
	System.out.println("tokens="+tokens);
	System.out.println("aliases="+aliases);
*/
		defineTokenNamesAndLiteralsInGrammar(root);
	}

/*
protected void defineStringLiteralsFromDelegates() {
	 if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) {
		 List<Grammar> delegates = grammar.getDelegates();
		 System.out.println("delegates in master combined: "+delegates);
		 for (int i = 0; i < delegates.size(); i++) {
			 Grammar d = (Grammar) delegates.get(i);
			 Set<String> literals = d.getStringLiterals();
			 for (Iterator it = literals.iterator(); it.hasNext();) {
				 String literal = (String) it.next();
				 System.out.println("literal "+literal);
				 int ttype = grammar.getTokenType(literal);
				 grammar.defineLexerRuleForStringLiteral(literal, ttype);
			 }
		 }
	 }
}
*/

	protected void assignStringTypes(Grammar root) {
		// walk string literals assigning types to unassigned ones
		Set s = stringLiterals.keySet();
		for (Iterator it = s.iterator(); it.hasNext();) {
			String lit = (String) it.next();
			Integer oldTypeI = (Integer)stringLiterals.get(lit);
			int oldType = oldTypeI.intValue();
			if ( oldType<Label.MIN_TOKEN_TYPE ) {
				Integer typeI = Utils.integer(root.getNewTokenType());
				stringLiterals.put(lit, typeI);
				// if string referenced in combined grammar parser rule,
				// automatically define in the generated lexer
				root.defineLexerRuleForStringLiteral(lit, typeI.intValue());
			}
		}
	}

	protected void aliasTokenIDsAndLiterals(Grammar root) {
		if ( root.type==Grammar.LEXER ) {
			return; // strings/chars are never token types in LEXER
		}
		// walk aliases if any and assign types to aliased literals if literal
		// was referenced
		Set s = aliases.keySet();
		for (Iterator it = s.iterator(); it.hasNext();) {
			String tokenID = (String) it.next();
			String literal = (String)aliases.get(tokenID);
			if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) {
				stringLiterals.put(literal, tokens.get(tokenID));
				// an alias still means you need a lexer rule for it
				Integer typeI = (Integer)tokens.get(tokenID);
				if ( !tokenRuleDefs.contains(tokenID) ) {
					root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI.intValue());
				}
			}
		}
	}

	protected void assignTokenIDTypes(Grammar root) {
		// walk token names, assigning values if unassigned
		Set s = tokens.keySet();
		for (Iterator it = s.iterator(); it.hasNext();) {
			String tokenID = (String) it.next();
			if ( tokens.get(tokenID)==UNASSIGNED ) {
				tokens.put(tokenID, Utils.integer(root.getNewTokenType()));
			}
		}
	}

	protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) {
		Set s = tokens.keySet();
		for (Iterator it = s.iterator(); it.hasNext();) {
			String tokenID = (String) it.next();
			int ttype = ((Integer)tokens.get(tokenID)).intValue();
			root.defineToken(tokenID, ttype);
		}
		s = stringLiterals.keySet();
		for (Iterator it = s.iterator(); it.hasNext();) {
			String lit = (String) it.next();
			int ttype = ((Integer)stringLiterals.get(lit)).intValue();
			root.defineToken(lit, ttype);
		}
	}

}