/******************************************************************************* * Copyright (c) 2008 SAP * see https://research.qkal.sap.corp/mediawiki/index.php/CoMONET * * Date: $Date: 2009-10-14 14:21:35 +0200 (Mi, 14 Okt 2009) $ * @version $Revision: 8347 $ * @author: $Author: c5106462 $ *******************************************************************************/ package com.sap.furcas.parsergenerator.tcs.t2m.grammar; import java.util.Collection; import java.util.List; import java.util.Map; import com.sap.furcas.metamodel.FURCAS.TCS.ClassTemplate; import com.sap.furcas.metamodel.FURCAS.TCS.ConcreteSyntax; import com.sap.furcas.metamodel.FURCAS.TCS.EnumerationTemplate; import com.sap.furcas.metamodel.FURCAS.TCS.FunctionTemplate; import com.sap.furcas.metamodel.FURCAS.TCS.OperatorTemplate; import com.sap.furcas.metamodel.FURCAS.TCS.PrimitiveTemplate; import com.sap.furcas.metamodel.FURCAS.TCS.Symbol; import com.sap.furcas.metamodel.FURCAS.TCS.Template; import com.sap.furcas.metamodel.FURCAS.TCS.Token; import com.sap.furcas.parsergenerator.TCSSyntaxContainerBean; import com.sap.furcas.parsergenerator.tcs.t2m.grammar.rules.SymbolProductionRule; import com.sap.furcas.parsergenerator.tcs.t2m.validation.SyntaxDefinitionValidation; import com.sap.furcas.parsergenerator.util.LexerStringMemberExtractor; import com.sap.furcas.parsergenerator.util.LexerStringMemberExtractor.InvalidLexerStringException; import com.sap.furcas.runtime.common.exceptions.MetaModelLookupException; import com.sap.furcas.runtime.common.exceptions.SyntaxElementException; import com.sap.furcas.runtime.common.interfaces.IMetaModelLookup; import com.sap.furcas.runtime.parser.TextLocation; import com.sap.furcas.runtime.parser.exceptions.SyntaxParsingException; import com.sap.furcas.runtime.parser.impl.ObservableInjectingParser; import com.sap.furcas.runtime.tcs.MetaModelElementResolutionHelper; import com.sap.furcas.runtime.tcs.SyntaxLookup; import com.sap.furcas.runtime.tcs.TemplateNamingHelper; import com.sap.furcas.utils.StringUtil; /** * orchestrates the creation of grammar elements. This class is a facade to several * helper classes dealing with specific syntax elements and their mappings to ANTRL3 grammar rules. * For elements of a TCS Syntax definition model (of a DSL Syntax), this Mapper * helps writing the ANTLR3 grammar for each template type. * * @author C5107456 */ public class ANTLRGrammarGenerator { /** The writer. */ private ANTLR3GrammarWriter writer; // elements protected to make unit testing a bit easier /** The alternative handler. */ private AlternativeHandler alternativeHandler; /** The block handler. */ private BlockTypeHandler blockHandler; /** The con el handler. */ private ConditionalElementHandler<?> conElHandler; /** The property handler. */ private PropertyTypeHandler<?> propertyHandler; /** The token handler. */ private TokenHandler tokenHandler; /** The prim temp handler. */ private PrimitiveTemplateHandler primTempHandler; /** The function temp handler. */ private FunctionTemplateHandler<?> funcTempHandler; /** The class temp handler. */ private ClassTemplateHandler<?> classTempHandler; /** * */ private EnumerationTemplateHandler<?> enumTempHandler; /** The operator template handler. */ private OperatorTemplateHandler<?> operatorTemplateHandler; /** The rule body factory. */ private RuleBodyBufferFactory ruleBodyFactory; /** The operator handler. */ private OperatorHandler operatorHandler; /** The syntax lookup. */ private SyntaxLookup syntaxLookup; /** The syntax. */ private ConcreteSyntax syntax; private Map<Object, TextLocation> locationMap; private SemanticErrorBucket errorBucket; private InjectorActionsHandler<?> actionsHandler; /** * Creates an ANTLR3 grammar for the given syntax and uses the writer to * write it as a String. * * @param grammarWriter * @param lookup * @param bean * @param qualifiedNamesSeparator */ private <T> void init(ANTLR3GrammarWriter grammarWriter, IMetaModelLookup<T> metaLookup, TCSSyntaxContainerBean syntaxbean) { // collects all errors that happen during grammar generation errorBucket = new SemanticErrorBucket(); MetaModelElementResolutionHelper<T> resolutionHelper = new MetaModelElementResolutionHelper<T>(metaLookup); this.writer = grammarWriter; this.syntax = syntaxbean.getSyntax(); this.syntaxLookup = new SyntaxLookup(syntax, resolutionHelper); TemplateNamingHelper<T> namingHelper = new TemplateNamingHelper<T>(resolutionHelper); this.locationMap = syntaxbean.getElementToLocationMap(); SyntaxElementHandlerConfigurationBean<T> handlerConfig = new SyntaxElementHandlerConfigurationBean<T>(writer, operatorHandler, metaLookup, syntaxLookup, namingHelper, errorBucket, resolutionHelper); // rule producing handlers this.tokenHandler = new TokenHandler(handlerConfig); this.operatorTemplateHandler = new OperatorTemplateHandler<T>(handlerConfig); this.operatorHandler = new OperatorHandler(handlerConfig); this.primTempHandler = new PrimitiveTemplateHandler(handlerConfig); this.funcTempHandler = new FunctionTemplateHandler<T>(handlerConfig); // ruleBody producing Handlers this.alternativeHandler = new AlternativeHandler(); this.blockHandler = new BlockTypeHandler(); this.propertyHandler = new PropertyTypeHandler<T>(handlerConfig); this.conElHandler = new ConditionalElementHandler<T>(handlerConfig); this.classTempHandler = new ClassTemplateHandler<T>(operatorHandler, handlerConfig); this.enumTempHandler = new EnumerationTemplateHandler<T>(handlerConfig); this.actionsHandler = new InjectorActionsHandler<T>(handlerConfig); // factory depending on rulebody producing handlers this.ruleBodyFactory = new RuleBodyBufferFactory(alternativeHandler, blockHandler, conElHandler, syntaxLookup, propertyHandler, namingHelper, actionsHandler, errorBucket); } /** * traverses the syntax definition elements and creates grammar elements by * delegating creation actions to Handlers. The result is stored in the grammar Writer * that was passed in the constructor, call writer.getOuput to write get the ANTLR stream. * @return * @throws MetaModelLookupException * * @throws SyntaxParsingException * the syntax parsing exception * @throws MetaModelLookupException * the meta model lookup exception */ public synchronized GenerationReport generateGrammar( ANTLR3GrammarWriter writer2, IMetaModelLookup<?> metaLookup, TCSSyntaxContainerBean syntaxbean, SyntaxDefinitionValidation validationRules, Class<? extends ObservableInjectingParser> parserSuperClass ) throws MetaModelLookupException { init(writer2, metaLookup, syntaxbean); errorBucket.clear(); if (validationRules != null) { validationRules.validateSyntax(syntax, metaLookup, errorBucket); } // TODO: Do not generate on validation errors? writer.setGrammarName(syntax.getName()); // this should really be a UUID so that we can identify if the syntax was newly // generated. THis change in UUID is used to trigger the migration algorithms. // until TCS elements will all be identified by their UUIDs we will use the // special UUID attribute to store a randomly generated UUID. See documentation // of ConcreteSyntax:getUuid() writer.setSyntaxUUID(ObservationDirectivesHelper.getId(syntax)); String lexerString = syntax.getLexer(); if (lexerString == null) { lexerString = DEFAULT_LEXER; } else { lexerString = StringUtil.unescapeString(lexerString); // To stay compatible with TCS for a while, transform TCS special tokens // as in TCS2ANTLR.ATL line 558 ff lexerString = lexerString.replaceAll("%protected", "fragment"); lexerString = lexerString.replaceAll("%options", "//"); lexerString = lexerString.replaceAll("%v3", ""); lexerString = lexerString.replaceAll("%v2.*", ""); try { LexerStringMemberExtractor extractor = new LexerStringMemberExtractor(lexerString); lexerString = extractor.getCutLexerString(); List<String> memberParts = extractor.getContentParts(); if (memberParts != null && memberParts.size() > 0) { writer.addLexerMembers(memberParts); } } catch (InvalidLexerStringException e) { errorBucket.addError(e.getMessage(), syntax); } } writer.setFixedString(lexerString); if (parserSuperClass != null) { writer.setParserSuperClass(parserSuperClass); } if (syntax.getK() == null) { // if lookahead is 0, we need backtracking for ambiguous situations writer.setGrammarOptions("backtrack = true;"); } else if (syntax.getK() == 0) { // leave empty which means that synpreds have to be added manually writer.setGrammarOptions(null); } else { writer.setGrammarOptions("k = " + syntax.getK() + ";"); } Collection<Template> templates = syntax.getTemplates(); for (Template temp : templates) { try { this.addTemplateProductionRuleToGrammar(temp); } catch (SyntaxElementException e) { errorBucket.addException(e); } } // Operator Lists are dealt with from operatored Class templates, or // else they aren't usable anyways // List<OperatorList> operators = syntax.getOperatorLists(); // for (Iterator<OperatorList> iterator = operators.iterator(); // iterator.hasNext();) { // OperatorList operatorList = (OperatorList) iterator.next(); // mapper.addElementMappingRuleToGrammar( operatorList); // } Collection<Token> tokens = syntax.getTokens(); for (Token token : tokens) { this.addTokenProductionRuleToGrammar(token); } Collection<Symbol> symbols = syntax.getSymbols(); for (Symbol symbol : symbols) { this.addSymbolProductionRuleToGrammar(symbol); } // To beautify grammar, also add token rules for all keywords that aren't symbols GenerationReport report = new GenerationReport(locationMap, errorBucket, syntax.getName(), syntax); return report; } /** * Adds the token production rule to grammar. * * @param token * the token */ private void addTokenProductionRuleToGrammar(Token token) { tokenHandler.addToken(token); } /** * Adds the symbol production rule to grammar. * * @param symbol * the symbol */ private void addSymbolProductionRuleToGrammar(Symbol symbol) { writer.addRule(new SymbolProductionRule(symbol.getName().toUpperCase(), symbol.getValue())); } /** * Chooses right helper for the given template. This method can be * recursively be called by helpers. * * @param template * the template * @throws * @throws MetaModelLookupException * * @throws SyntaxParsingException * the syntax parsing exception * @throws MetaModelLookupException * the meta model lookup exception * @throws SyntaxElementException */ private void addTemplateProductionRuleToGrammar(Template template) throws MetaModelLookupException, SyntaxElementException { if (template instanceof ClassTemplate) { ClassTemplate primTemp = (ClassTemplate) template; classTempHandler.addTemplate(primTemp, ruleBodyFactory); } else if (template instanceof PrimitiveTemplate) { PrimitiveTemplate primTemp = (PrimitiveTemplate) template; primTempHandler.addTemplate(primTemp); } else if (template instanceof OperatorTemplate) { OperatorTemplate primTemp = (OperatorTemplate) template; operatorTemplateHandler.addTemplate(primTemp, ruleBodyFactory); } else if (template instanceof EnumerationTemplate) { EnumerationTemplate primTemp = (EnumerationTemplate) template; enumTempHandler.addTemplate(primTemp, ruleBodyFactory); } else if (template instanceof FunctionTemplate) { FunctionTemplate funcTemp = (FunctionTemplate) template; funcTempHandler.addTemplate(funcTemp, ruleBodyFactory); } else { // should never happen, unless TCS metamodel changed throw new RuntimeException(template.getClass() + " unknown implementation of Template"); } } protected static final String DEFAULT_LEXER = "NL\r\n" + " : ( \'\\r\' \'\\n\'\r\n" + " | \'\\n\' \'\\r\' //Improbable\r\n" + " | \'\\r\'\r\n" + " | \'\\n\'\r\n" + " )\r\n" + " {newline();$channel=HIDDEN;}\r\n" + " ;\r\n" + "\r\n" + "WS\r\n" + " : ( \' \'\r\n" + " | \'\\t\'\r\n" + " )\r\n" + " {$channel=HIDDEN;}" + " ;\r\n" + "\r\n" + "fragment\r\n" + "DIGIT\r\n" + " : \'0\'..\'9\'\r\n" + " ;\r\n" + "\r\n" + "fragment\r\n" + "ALPHA\r\n" + " : \'a\'..\'z\'\r\n" + " | \'A\'..\'Z\'\r\n" + " | \'_\'\r\n" + " //For Unicode compatibility (from 0000 to 00ff)\r\n" + " | \'\\u00C0\' .. \'\\u00D6\'\r\n" + " | \'\\u00D8\' .. \'\\u00F6\'\r\n" + " | \'\\u00F8\' .. \'\\u00FF\'\r\n" + " ;\r\n" + "\r\n" + "fragment\r\n" + "SNAME\r\n" + " : (ALPHA) (ALPHA | DIGIT)*\r\n" + ";\r\n" + "\r\n" + "NAME\r\n" + " : (\r\n" + " SNAME\r\n" + " | \'\"\'\r\n" + " ( ESC\r\n" + " | \'\\n\' {newline();}\r\n" + " | ~(\'\\\\\'|\'\\\"\'|\'\\n\')\r\n" + " )*\r\n" + " \'\"\'\r\n" + " \r\n" + " )\r\n" + " ;\r\n" + "\r\n" + "INT\r\n" + " : (DIGIT)+\r\n" + " ;\r\n" + "\r\n" + " FLOAT : DIGIT+ ((\'.\' DIGIT)=>\'.\' DIGIT+)? ; // cannot accept DIGIT \'.\' because it would conflict with Navigation\r\n" + "\r\n" + "fragment\r\n" + "ESC\r\n" + " : \'\\\\\'\r\n" + " ( \'n\' \r\n" + " | \'r\' \r\n" + " | \'t\' \r\n" + " | \'b\' \r\n" + " | \'f\' \r\n" + " | \'\"\' \r\n" + " | \'\\\'\' \r\n" + " | \'\\\\\' \r\n" + " | (\r\n" + " (\'0\'..\'3\')\r\n" + " (\r\n" + " : (\'0\'..\'7\')\r\n" + " (\r\n" + " : \'0\'..\'7\'\r\n" + " )?\r\n" + " )?\r\n" + " | (\'4\'..\'7\')\r\n" + " (\r\n" + " : (\'0\'..\'7\')\r\n" + " )?\r\n" + " )\r\n" + " {\r\n" + " }\r\n" + " )\r\n" + " ;\r\n" + "\r\n" + "STRING\r\n" + " : \'\\\'\'\r\n" + " ( ESC\r\n" + " | \'\\n\' {newline();}\r\n" + " | ~(\'\\\\\'|\'\\\'\'|\'\\n\')\r\n" + " )*\r\n" + " \'\\\'\'\r\n" + " \r\n" + " ;"; }