/******************************************************************************* * Copyright (c) 2008 Scott Stanchfield. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Based on the ANTLR parser generator by Terence Parr, http://antlr.org * Ric Klaren <klaren@cs.utwente.nl> * Scott Stanchfield - Modifications for XML Parsing *******************************************************************************/ package com.javadude.antxr; import java.io.IOException; import java.util.List; /**Generate P.html, a cross-linked representation of P with or without actions */ public class HTMLCodeGenerator extends CodeGenerator { /** non-zero if inside syntactic predicate generation */ protected int syntacticPredLevel = 0; /** true during lexer generation, false during parser generation */ protected boolean doingLexRules = false; protected boolean firstElementInAlt; protected AlternativeElement prevAltElem = null; // what was generated last? /** Create a Diagnostic code-generator using the given Grammar * The caller must still call setTool, setBehavior, and setAnalyzer * before generating code. */ public HTMLCodeGenerator() { super(); charFormatter = new JavaCharFormatter(); } /** Encode a string for printing in a HTML document.. * e.g. encode '<' '>' and similar stuff * @param s the string to encode */ static String HTMLEncode(String s) { StringBuffer buf = new StringBuffer(); for (int i = 0, len = s.length(); i < len; i++) { char c = s.charAt(i); if (c == '&') { buf.append("&"); } else if (c == '\"') { buf.append("""); } else if (c == '\'') { buf.append("'"); } else if (c == '<') { buf.append("<"); } else if (c == '>') { buf.append(">"); } else { buf.append(c); } } return buf.toString(); } @Override public void gen() { // Do the code generation try { // Loop over all grammars for (Grammar g : behavior.grammars.values()) { // Connect all the components to each other /* g.setGrammarAnalyzer(analyzer); analyzer.setGrammar(g); */ g.setCodeGenerator(this); // To get right overloading behavior across hetrogeneous grammars g.generate(); if (antxrTool.hasError()) { antxrTool.fatalError("Exiting due to errors."); } } } catch (IOException e) { antxrTool.reportException(e, null); } } /** Generate code for the given grammar element. * @param blk The {...} action to generate */ @Override public void gen(ActionElement action) { // no-op } /** Generate code for the given grammar element. * @param blk The "x|y|z|..." block to generate */ @Override public void gen(AlternativeBlock blk) { genGenericBlock(blk, ""); } /** Generate code for the given grammar element. * @param blk The block-end element to generate. Block-end * elements are synthesized by the grammar parser to represent * the end of a block. */ @Override public void gen(BlockEndElement end) { // no-op } /** Generate code for the given grammar element. * @param blk The character literal reference to generate */ @Override public void gen(CharLiteralElement atom) { if (atom.not) { _print("~"); } _print(HTMLCodeGenerator.HTMLEncode(atom.atomText) + " "); } /** Generate code for the given grammar element. * @param blk The character-range reference to generate */ @Override public void gen(CharRangeElement r) { print(r.beginText + ".." + r.endText + " "); } /** Generate the lexer HTML file */ @Override public void gen(LexerGrammar g) throws IOException { setGrammar(g); antxrTool.reportProgress("Generating " + grammar.getClassName() + ".html"); currentOutput = antxrTool.openOutputFile(grammar.getClassName() + ".html"); //SAS: changed for proper text file io tabs = 0; doingLexRules = true; // Generate header common to all TXT output files genHeader(); // Output the user-defined lexer premamble // RK: guess not.. // println(grammar.preambleAction.getText()); // Generate lexer class definition println(""); // print javadoc comment if any if (grammar.comment != null) { _println(HTMLCodeGenerator.HTMLEncode(grammar.comment)); } println("Definition of lexer " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + "."); // Generate user-defined parser class members // printAction(grammar.classMemberAction.getText()); /* // Generate string literals println(""); println("*** String literals used in the parser"); println("The following string literals were used in the parser."); println("An actual code generator would arrange to place these literals"); println("into a table in the generated lexer, so that actions in the"); println("generated lexer could match token text against the literals."); println("String literals used in the lexer are not listed here, as they"); println("are incorporated into the mainstream lexer processing."); tabs++; // Enumerate all of the symbols and look for string literal symbols Enumeration ids = grammar.getSymbols(); while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol)ids.nextElement(); // Only processing string literals -- reject other symbol entries if ( sym instanceof StringLiteralSymbol ) { StringLiteralSymbol s = (StringLiteralSymbol)sym; println(s.getId() + " = " + s.getTokenType()); } } tabs--; println("*** End of string literals used by the parser"); */ // Generate nextToken() rule. // nextToken() is a synthetic lexer rule that is the implicit OR of all // user-defined lexer rules. genNextToken(); // Generate code for each rule in the lexer for (RuleSymbol rs : grammar.rules) { if (!rs.id.equals("mnextToken")) { genRule(rs); } } // Close the lexer output file currentOutput.close(); currentOutput = null; doingLexRules = false; } /** Generate code for the given grammar element. * @param blk The (...)+ block to generate */ @Override public void gen(OneOrMoreBlock blk) { genGenericBlock(blk, "+"); } /** Generate the parser HTML file */ @Override public void gen(ParserGrammar g) throws IOException { setGrammar(g); // Open the output stream for the parser and set the currentOutput antxrTool.reportProgress("Generating " + grammar.getClassName() + ".html"); currentOutput = antxrTool.openOutputFile(grammar.getClassName() + ".html"); tabs = 0; // Generate the header common to all output files. genHeader(); // Generate parser class definition println(""); // print javadoc comment if any if (grammar.comment != null) { _println(HTMLCodeGenerator.HTMLEncode(grammar.comment)); } println("Definition of parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + "."); // Enumerate the parser rules for (GrammarSymbol sym : grammar.rules) { println(""); // Only process parser rules if (sym instanceof RuleSymbol) { genRule((RuleSymbol)sym); } } tabs--; println(""); genTail(); // Close the parser output stream currentOutput.close(); currentOutput = null; } /** Generate code for the given grammar element. * @param blk The rule-reference to generate */ @Override public void gen(RuleRefElement rr) { grammar.getSymbol(rr.targetRule); // Generate the actual rule description _print("<a href=\"" + grammar.getClassName() + ".html#" + rr.targetRule + "\">"); _print(rr.targetRule); _print("</a>"); // RK: Leave out args.. // if (rr.args != null) { // _print("["+rr.args+"]"); // } _print(" "); } /** Generate code for the given grammar element. * @param blk The string-literal reference to generate */ @Override public void gen(StringLiteralElement atom) { if (atom.not) { _print("~"); } _print(HTMLCodeGenerator.HTMLEncode(atom.atomText)); _print(" "); } /** Generate code for the given grammar element. * @param blk The token-range reference to generate */ @Override public void gen(TokenRangeElement r) { print(r.beginText + ".." + r.endText + " "); } /** Generate code for the given grammar element. * @param blk The token-reference to generate */ @Override public void gen(TokenRefElement atom) { if (atom.not) { _print("~"); } _print(atom.atomText); _print(" "); } @Override public void gen(TreeElement t) { print(t + " "); } /** Generate the tree-walker TXT file */ @Override public void gen(TreeWalkerGrammar g) throws IOException { setGrammar(g); // Open the output stream for the parser and set the currentOutput antxrTool.reportProgress("Generating " + grammar.getClassName() + ".html"); currentOutput = antxrTool.openOutputFile(grammar.getClassName() + ".html"); //SAS: changed for proper text file io tabs = 0; // Generate the header common to all output files. genHeader(); // Output the user-defined parser premamble println(""); // println("*** Tree-walker Preamble Action."); // println("This action will appear before the declaration of your tree-walker class:"); // tabs++; // println(grammar.preambleAction.getText()); // tabs--; // println("*** End of tree-walker Preamble Action"); // Generate tree-walker class definition println(""); // print javadoc comment if any if (grammar.comment != null) { _println(HTMLCodeGenerator.HTMLEncode(grammar.comment)); } println("Definition of tree parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + "."); // Generate user-defined tree-walker class members // println(""); // println("*** User-defined tree-walker class members:"); // println("These are the member declarations that you defined for your class:"); // tabs++; // printAction(grammar.classMemberAction.getText()); // tabs--; // println("*** End of user-defined tree-walker class members"); // Generate code for each rule in the grammar println(""); // println("*** tree-walker rules:"); tabs++; // Enumerate the tree-walker rules for (GrammarSymbol sym : grammar.rules) { println(""); // Only process tree-walker rules if (sym instanceof RuleSymbol) { genRule((RuleSymbol)sym); } } tabs--; println(""); // println("*** End of tree-walker rules"); // println(""); // println("*** End of tree-walker"); // Close the tree-walker output stream currentOutput.close(); currentOutput = null; } /** Generate a wildcard element */ @Override public void gen(WildcardElement wc) { /* if ( wc.getLabel()!=null ) { _print(wc.getLabel()+"="); } */ _print(". "); } /** Generate code for the given grammar element. * @param blk The (...)* block to generate */ @Override public void gen(ZeroOrMoreBlock blk) { genGenericBlock(blk, "*"); } protected void genAlt(Alternative alt) { if (alt.getTreeSpecifier() != null) { _print(alt.getTreeSpecifier().getText()); } prevAltElem = null; for (AlternativeElement elem = alt.head; !(elem instanceof BlockEndElement); elem = elem.next) { elem.generate(); firstElementInAlt = false; prevAltElem = elem; } } /** Generate the header for a block, which may be a RuleBlock or a * plain AlternativeBLock. This generates any variable declarations, * init-actions, and syntactic-predicate-testing variables. * @blk The block for which the preamble is to be generated. */ // protected void genBlockPreamble(AlternativeBlock blk) { // RK: don't dump out init actions // dump out init action // if ( blk.initAction!=null ) { // printAction("{" + blk.initAction + "}"); // } // } /**Generate common code for a block of alternatives; return a postscript * that needs to be generated at the end of the block. Other routines * may append else-clauses and such for error checking before the postfix * is generated. */ public void genCommonBlock(AlternativeBlock blk) { for (int i = 0; i < blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); // dump alt operator | if (i > 0 && blk.alternatives.size() > 1) { _println(""); print("|\t"); } // Dump the alternative, starting with predicates // boolean save = firstElementInAlt; firstElementInAlt = true; tabs++; // in case we do a newline in alt, increase the tab indent // RK: don't dump semantic/syntactic predicates // only obscures grammar. // // Dump semantic predicates // // if (alt.semPred != null) { // println("{" + alt.semPred + "}?"); // } // Dump syntactic predicate // if (alt.synPred != null) { // genSynPred(alt.synPred); // } genAlt(alt); tabs--; firstElementInAlt = save; } } /** Generate a textual representation of the follow set * for a block. * @param blk The rule block of interest */ public void genFollowSetForRuleBlock(RuleBlock blk) { Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, blk.endNode); printSet(grammar.maxk, 1, follow); } protected void genGenericBlock(AlternativeBlock blk, String blkOp) { if (blk.alternatives.size() > 1) { // make sure we start on a new line if (!firstElementInAlt) { // only do newline if the last element wasn't a multi-line block if (prevAltElem == null || !(prevAltElem instanceof AlternativeBlock) || ((AlternativeBlock)prevAltElem).alternatives.size() == 1) { _println(""); print("(\t"); } else { _print("(\t"); } // _println(""); // print("(\t"); } else { _print("(\t"); } } else { _print("( "); } // RK: don't dump init actions // genBlockPreamble(blk); genCommonBlock(blk); if (blk.alternatives.size() > 1) { _println(""); print(")" + blkOp + " "); // if not last element of alt, need newline & to indent if (!(blk.next instanceof BlockEndElement)) { _println(""); print(""); } } else { _print(")" + blkOp + " "); } } /** Generate a header that is common to all TXT files */ protected void genHeader() { println("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"); println("<HTML>"); println("<HEAD>"); println("<TITLE>Grammar " + antxrTool.grammarFile + "</TITLE>"); println("</HEAD>"); println("<BODY>"); println("<table summary=\"\" border=\"1\" cellpadding=\"5\">"); println("<tr>"); println("<td>"); println("<font size=\"+2\">Grammar " + grammar.getClassName() + "</font><br>"); println("<a href=\"http://www.com.javadude.antxr.org\">ANTXR</a>-generated HTML file from " + antxrTool.grammarFile); println("<p>"); println("Terence Parr, <a href=\"http://www.magelang.com\">MageLang Institute</a>"); println("<br>ANTXR Version " + Tool.version + "; 1989-2005"); println("</td>"); println("</tr>"); println("</table>"); println("<PRE>"); // RK: see no reason for printing include files and stuff... // tabs++; // printAction(behavior.getHeaderAction("")); // tabs--; } /**Generate the lookahead set for an alternate. */ protected void genLookaheadSetForAlt(Alternative alt) { if (doingLexRules && alt.cache[1].containsEpsilon()) { println("MATCHES ALL"); return; } int depth = alt.lookaheadDepth; if (depth == GrammarAnalyzer.NONDETERMINISTIC) { // if the decision is nondeterministic, do the best we can: LL(k) // any predicates that are around will be generated later. depth = grammar.maxk; } for (int i = 1; i <= depth; i++) { Lookahead lookahead = alt.cache[i]; printSet(depth, i, lookahead); } } /** Generate a textual representation of the lookahead set * for a block. * @param blk The block of interest */ public void genLookaheadSetForBlock(AlternativeBlock blk) { // Find the maximal lookahead depth over all alternatives int depth = 0; for (int i = 0; i < blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); if (alt.lookaheadDepth == GrammarAnalyzer.NONDETERMINISTIC) { depth = grammar.maxk; break; } else if (depth < alt.lookaheadDepth) { depth = alt.lookaheadDepth; } } for (int i = 1; i <= depth; i++) { Lookahead lookahead = grammar.theLLkAnalyzer.look(i, blk); printSet(depth, i, lookahead); } } /** Generate the nextToken rule. * nextToken is a synthetic lexer rule that is the implicit OR of all * user-defined lexer rules. */ public void genNextToken() { println(""); println("/** Lexer nextToken rule:"); println(" * The lexer nextToken rule is synthesized from all of the user-defined"); println(" * lexer rules. It logically consists of one big alternative block with"); println(" * each user-defined rule being an alternative."); println(" */"); // Create the synthesized rule block for nextToken consisting // of an alternate block containing all the user-defined lexer rules. RuleBlock blk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken"); // Define the nextToken rule symbol RuleSymbol nextTokenRs = new RuleSymbol("mnextToken"); nextTokenRs.setDefined(); nextTokenRs.setBlock(blk); nextTokenRs.access = "private"; grammar.define(nextTokenRs); /* // Analyze the synthesized block if (!grammar.theLLkAnalyzer.deterministic(blk)) { println("The grammar analyzer has determined that the synthesized"); println("nextToken rule is non-deterministic (i.e., it has ambiguities)"); println("This means that there is some overlap of the character"); println("lookahead for two or more of your lexer rules."); } */ genCommonBlock(blk); } /** Generate code for a named rule block * @param s The RuleSymbol describing the rule to generate */ public void genRule(RuleSymbol s) { if (s == null || !s.isDefined()) { return; // undefined rule } println(""); if (s.comment != null) { _println(HTMLCodeGenerator.HTMLEncode(s.comment)); } if (s.access.length() != 0) { if (!s.access.equals("public")) { _print(s.access + " "); } } _print("<a name=\"" + s.getId() + "\">"); _print(s.getId()); _print("</a>"); // Get rule return type and arguments RuleBlock rblk = s.getBlock(); // RK: for HTML output not of much value... // Gen method return value(s) // if (rblk.returnAction != null) { // _print("["+rblk.returnAction+"]"); // } // Gen arguments // if (rblk.argAction != null) // { // _print(" returns [" + rblk.argAction+"]"); // } _println(""); tabs++; print(":\t"); // Dump any init-action // genBlockPreamble(rblk); // Dump the alternates of the rule genCommonBlock(rblk); _println(""); println(";"); tabs--; } /** Generate the syntactic predicate. This basically generates * the alternative block, buts tracks if we are inside a synPred * @param blk The syntactic predicate block */ protected void genSynPred(SynPredBlock blk) { syntacticPredLevel++; genGenericBlock(blk, " =>"); syntacticPredLevel--; } public void genTail() { println("</PRE>"); println("</BODY>"); println("</HTML>"); } /** Generate the token types TXT file */ protected void genTokenTypes(TokenManager tm) throws IOException { // Open the token output TXT file and set the currentOutput stream antxrTool.reportProgress("Generating " + tm.getName() + CodeGenerator.TokenTypesFileSuffix + CodeGenerator.TokenTypesFileExt); currentOutput = antxrTool.openOutputFile(tm.getName() + CodeGenerator.TokenTypesFileSuffix + CodeGenerator.TokenTypesFileExt); //SAS: changed for proper text file io tabs = 0; // Generate the header common to all diagnostic files genHeader(); // Generate a string for each token. This creates a static // array of Strings indexed by token type. println(""); println("*** Tokens used by the parser"); println("This is a list of the token numeric values and the corresponding"); println("token identifiers. Some tokens are literals, and because of that"); println("they have no identifiers. Literals are double-quoted."); tabs++; // Enumerate all the valid token types List<String> v = tm.getVocabulary(); for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) { String s = v.get(i); if (s != null) { println(s + " = " + i); } } // Close the interface tabs--; println("*** End of tokens used by the parser"); // Close the tokens output file currentOutput.close(); currentOutput = null; } /** Get a string for an expression to generate creation of an AST subtree. * @param v A Vector of String, where each element is an expression in the target language yielding an AST node. */ @Override public String getASTCreateString(List<String> v) { return null; } /** Get a string for an expression to generate creating of an AST node * @param str The arguments to the AST constructor */ @Override public String getASTCreateString(GrammarAtom atom, String str) { return null; } /** Map an identifier to it's corresponding tree-node variable. * This is context-sensitive, depending on the rule and alternative * being generated * @param id The identifier name to map * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned. */ @Override public String mapTreeId(String id, ActionTransInfo tInfo) { return id; } /// unused. @Override protected String processActionForSpecialSymbols(String actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo) { return actionStr; } /** Format a lookahead or follow set. * @param depth The depth of the entire lookahead/follow * @param k The lookahead level to print * @param lookahead The lookahead/follow set to print */ public void printSet(int depth, int k, Lookahead lookahead) { int numCols = 5; int[] elems = lookahead.fset.toArray(); if (depth != 1) { print("k==" + k + ": {"); } else { print("{ "); } if (elems.length > numCols) { _println(""); tabs++; print(""); } int column = 0; for (int i = 0; i < elems.length; i++) { column++; if (column > numCols) { _println(""); print(""); column = 0; } if (doingLexRules) { _print(charFormatter.literalChar(elems[i])); } else { _print(grammar.tokenManager.getVocabulary().get(elems[i])); } if (i != elems.length - 1) { _print(", "); } } if (elems.length > numCols) { _println(""); tabs--; print(""); } _println(" }"); } }