package x10.parser; import polyglot.util.CollectionUtil; import x10.util.CollectionFactory; import java.io.File; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.BufferedWriter; import java.io.FileWriter; import java.util.HashMap; import java.util.ArrayList; import java.util.StringTokenizer; import java.util.HashSet; import java.util.Collections; import java.util.Random; import java.util.Collection; import java.util.Set; import java.util.Map; /* Not used: all the rules at the beginning including MethodPrimaryPrefix ::= Primary . ErrorId $ErrorId MethodSuperPrefix ::= super . ErrorId $ErrorId PlaceType ::= PlaceExpression PlaceExpressionSingleListopt ::= $Empty | PlaceExpressionSingleList delete: -> duplications at the beginning */ public class AutoGenSentences { private static int MAX_DEPTH = 10; private static String CompilationUnit = "CompilationUnit"; private static String TypeDeclaration = "TypeDeclaration"; static class Rule extends ArrayList<String> { private final String symbol; private final int line; private final ArrayList<String> names = new ArrayList<String>(); private final ArrayList<String> code = new ArrayList<String>(); Rule(String symbol, int line) { this.symbol = symbol; this.line = line; } } public static void main(String[] args) { if (args.length!=2) { System.err.println("You need to run AutoGenSentences with two arguments: GRAMMAR_FILE OUTPUT_FILE\nFor example: java AutoGenSentences x10.g Output.x10\n"); System.exit(-1); } //only-grammar-productions.txt auto-gen-sentences.txt new AutoGenSentences(args); } AutoGenSentences(String[] args) { ArrayList<String> grammarFile = readFile(new File(args[0])); ArrayList<String> newFile = new ArrayList<String>(); String currProd = null; int lineNum = -1; try { boolean isTypes = false; Map<String, ArrayList<Rule>> rules = null; boolean inJavaCode = false; for (String line : grammarFile) { lineNum++; if (!isTypes) newFile.add(line); line = line.trim(); if (line.equals("")) continue; if (line.equals("%Rules")) { rules = grammar; continue; } if (line.equals("%Types")) { isTypes = true; rules = CollectionFactory.newHashMap(); continue; } if (line.equals("%End")) { if (isTypes) { isTypes = false; // assert all productions are of size 1 for (String type : rules.keySet()) for (ArrayList<String> prod : rules.get(type)) { assert prod.size()==1; String nonTerminal = prod.get(0); assert !isLiteral(nonTerminal); assert !types.containsKey(nonTerminal); types.put(nonTerminal,type); } } rules = null; } if (rules==null) continue; if (line.startsWith("--")) continue; // ignore comments // Ignore: /.$NullAction./ if (line.equals("/.$NullAction./")) continue; if (line.startsWith("/.")) { assert line.equals("/.$BeginJava") : line; inJavaCode = true; ArrayList<Rule> prods = rules.get(currProd); final int prodNum = prods.size() - 1; final Rule rule = prods.get(prodNum); ArrayList<String> ruleArgs = new ArrayList<String>(); int k=0; for (String name : rule.names) { String id = rule.get(k++); if (hasArg(id,name)) ruleArgs.add(name); } newFile.add("\t\t\tr.rule_"+rule.symbol+prodNum+"("+simpleJoin(ruleArgs,",")+");"); continue; } if (line.endsWith("./")) { assert inJavaCode; assert line.equals("./") || line.startsWith("$EndJava") : line; inJavaCode = false; continue; } if (inJavaCode) { ArrayList<Rule> prods = rules.get(currProd); final Rule rule = prods.get(prods.size() - 1); int k=1; for (String name : rule.names) line = line.replace("$"+name,""+(k++)); line = line.replace("$sym_type","X10Parsersym"); if (!line.equals("$EndJava")) rule.code.add(line); newFile.remove(newFile.size()-1); continue; } StringTokenizer tokenizer = new StringTokenizer(line); final String first = tokenizer.nextToken(); if (!first.equals("|")) { currProd = first; currProd = unescape(currProd); String next = tokenizer.nextToken(); assert next.equals("::=") || next.equals("::=?") : next; } assert currProd!=null; ArrayList<Rule> prods = rules.get(currProd); // AssignmentExpression is stated in 2 different rules if (prods==null) { prods = new ArrayList<Rule>(); rules.put(currProd, prods); } //ImportDeclarations PackageDeclaration$misplacedPackageDeclaration ImportDeclarationsopt$misplacedImportDeclarations TypeDeclarationsopt Rule terms = new Rule(currProd, lineNum); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (token.equals("--")) break; // comments if (token.equals("|")) { prods.add(terms); terms = new Rule(currProd, lineNum); continue; } token = unescape(token); assert token.charAt(0)!='$'; int indexDollar = token.indexOf('$'); String name = token; if (indexDollar!=-1) { name = token.substring(indexDollar+1); token = token.substring(0,indexDollar); // ImportDeclarationsopt$misplacedImportDeclarations } if (token.equals("%Empty")) continue; terms.add(token); terms.names.add(name); } // can be empty: assert terms.size()>=1 : currProd; prods.add(terms); } } catch (Throwable e) { System.err.println("Error on line "+lineNum); e.printStackTrace(); } // removing unused symbols and types findUsedSymbols(CompilationUnit); Set<String> unusedSymbols = CollectionFactory.newHashSet(grammar.keySet()); unusedSymbols.removeAll(usedSymbols); if (unusedSymbols.size()>0) { System.out.println("Unused symbols are: "+unusedSymbols); for (String s : unusedSymbols) grammar.remove(s); } Set<String> unusedTypes = CollectionFactory.newHashSet(types.keySet()); unusedTypes.removeAll(usedSymbols); if (unusedTypes.size()>0) { System.out.println("Unused types are: "+unusedTypes); for (String s : unusedTypes) types.remove(s); } final ArrayList<String> nonTerminals = getNonTerminals(); System.out.println("Roots are: "+findRoots()); System.out.println("Literals are: "+getLiterals()); System.out.println("Non-terminals are: "+ nonTerminals); // consistency checks Set<String> nonTerminalsWithoutType = CollectionFactory.newHashSet(nonTerminals); nonTerminalsWithoutType.removeAll(types.keySet()); assert nonTerminalsWithoutType.size()==0 : nonTerminalsWithoutType; assert grammar.keySet().containsAll(types.keySet()); // printing output newFile.add("%Types"); newFile.add("\tObject ::= "+simpleJoin(types.keySet()," | ")); newFile.add("%End"); File output = new File(args[1]); if (true) { if (false) printSingletons(); if (false) printGrammar(CompilationUnit,CollectionFactory.<String>newHashSet()); writeFile(output,newFile); for (ArrayList<Rule> prods : grammar.values()) { int prodNum = -1; for (Rule rule : prods) { prodNum++; if (rule.code.size()>0) { int k = 0; ArrayList<String> ruleArgs = new ArrayList<String>(); ArrayList<String> castArgs = new ArrayList<String>(); for (String name : rule.names) { String id = rule.get(k++); if (hasArg(id,name)) { ruleArgs.add("Object _"+name); String type = types.get(id); if (type==null) type = "IToken"; castArgs.add(type+" "+name+" = ("+type+") _"+name+";"); } } System.out.println("\t// Production: "+rule.symbol+" ::= "+ join(rule," ")); System.out.println("\tvoid rule_"+rule.symbol+prodNum+"("+simpleJoin(ruleArgs,", ")+") {"); for (String s : castArgs) System.out.println("\t\t"+s); for (String c : rule.code) System.out.println("\t\t"+c); System.out.println("\t}"); } } } return; } //x10.g root is CompilationUnit, but we want to generate many TypeDeclaration printGrammar(TypeDeclaration,CollectionFactory.<String>newHashSet()); final Set<String> res = gen(TypeDeclaration, MAX_DEPTH); assert EMPTY_STR.size()==1 : EMPTY_STR; writeFile(output,res); } private static String unescape(String token) { if (token.charAt(0)=='\'') { // we escaped some tokens, like '|' '%' '-->' assert token.charAt(token.length()-1)=='\'' : token; token = token.substring(1,token.length()-1); } return token; } final Map<String, ArrayList<Rule>> grammar = CollectionFactory.newHashMap(); final Map<String, String> types = CollectionFactory.newHashMap(); Set<String> EMPTY_STR = CollectionFactory.newHashSet(Collections.singleton("")); String join(Collection<String> arr, String sep) { if (arr.size()==0) return "%Empty"; String res = ""; for (String s : arr) { final char c = s.charAt(0); res = res + (res.equals("") ? "" : sep) + (Character.isLetterOrDigit(c) ? s : "'"+s+"'"); } return res; } String simpleJoin(Collection<String> arr, String sep) { if (arr.size()==0) return ""; String res = ""; for (String s : arr) { res = res + (res.equals("") ? "" : sep) + s; } return res; } Map<String,Set<String>> graph = CollectionFactory.newHashMap(); Map<String,Integer> visited = CollectionFactory.newHashMap(); int currID = 0; void printSingletons() { // I want to make sure the singletons don't have cycles for (String symbol : grammar.keySet()) { final Set<String> set = CollectionFactory.newHashSet(); for (ArrayList<String> prods : grammar.get(symbol)) { if (prods.size()==1) { final String other = prods.get(0); set.add(other); } } if (set.size()>0) graph.put(symbol, set); } // do a DFS and assert we do not have a cycle for (String symbol : graph.keySet()) dfs(symbol); for (String symbol : graph.keySet()) { int id = visited.get(symbol); for (String child : graph.get(symbol)) { int id2 = visited.get(child); assert id2<id; System.out.println(symbol+"("+id+") -> "+ child+"("+id2+")"); } } } void dfs(String v) { final Integer i = visited.get(v); assert i==null || i.intValue()!=-1; if (i!=null) return; // already visited visited.put(v,-1); final Set<String> children = graph.get(v); if (children==null) { //assert isLiteral(v); , e.g., DepNamedType } else { for (String child : children) { dfs(child); } } visited.put(v,currID++); } void printGrammar(String symbol, Set<String> alreadyPrinted) { if (alreadyPrinted.contains(symbol)) return; alreadyPrinted.add(symbol); ArrayList<Rule> prods = grammar.get(symbol); if (prods==null) { // literal genLiteral(symbol); // for testing return; } System.out.println(symbol+" ::= " + (prods.size()==0 ? "" : join(prods.get(0)," "))); for (int i=1; i<prods.size(); i++) System.out.println("\t| "+join(prods.get(i)," ")); for (ArrayList<String> prod : prods) for (String s : prod) printGrammar(s,alreadyPrinted); } Set<String> genProd(ArrayList<String> prod, int depth) { final int prodNum = prod.size(); if (prodNum==0) return EMPTY_STR; ArrayList<Set<String>> acc = new ArrayList<Set<String>>(prodNum); int size = 0; for (String s : prod) { Set<String> set = gen(s,depth-1); if (set==null) return null; size += set.size(); acc.add(set); } if (prodNum ==1) return acc.get(0); ArrayList<String[]> acc2 = new ArrayList<String[]>(prodNum); for (Set<String> s : acc) acc2.add(s.toArray(new String[s.size()])); // should be the cartesian prod of all sets, but it is too big, so we sum the sets size *= 2; Set<String> res = CollectionFactory.newHashSet(2*size); Random r = new Random(); for (int i=0; i<size; i++) { StringBuilder s = new StringBuilder(); for (String[] arr : acc2) { s.append(arr[r.nextInt(arr.length)]); } res.add(s.toString()); } return res; } Set<String> gen(String rule, int depth) { Set<String> res = CollectionFactory.newHashSet(); ArrayList<Rule> prods = grammar.get(rule); if (prods==null) { // literal res.add(genLiteral(rule)+" "); } else { if (depth<=0) return null; for (ArrayList<String> prod : prods) { Set<String> acc = genProd(prod,depth); if (acc!=null) { assert acc.size()>0 : rule; res.addAll(acc); } } if (res.size()==0) return null; } return res; } boolean isLiteral(String s) { return !grammar.containsKey(s); } static boolean random() { return Math.random()<0.5; } static boolean hasArg(String id, String name) { return !isLiteral2(id); } static boolean isLiteral2(String s) { // because we need to know if something is a literal before we parse the entire file return genLiteral2(s)!=null; } static String genLiteral2(String s) { char first = s.charAt(0); if (first>='A' && first<='Z') { // special literal // special literals (all start with uppercase): if (s.equals("UnsignedIntegerLiteral")) { return random() ? "0u" : "1u"; } else if (s.equals("UnsignedLongLiteral")) { return random() ? "0ul" : "1ul"; } else if (s.equals("IntegerLiteral")) { return random() ? "0" : "1"; } else if (s.equals("LongLiteral")) { return random() ? "0l" : "1l"; } else if (s.equals("FloatingPointLiteral")) { return random() ? "0.0f" : "1.1f"; } else if (s.equals("DoubleLiteral")) { return random() ? "0.0" : "1.1"; } else if (s.equals("StringLiteral")) { return random() ? "\"\"" : "\"a\""; } else if (s.equals("CharacterLiteral")) { return random() ? "' '" : "'a'"; } else if (s.equals("IDENTIFIER")) { return random() ? "x" : "y"; } else if (s.equals("ErrorId")) { return "ERR"; } else { return null; } } return s; } static String genLiteral(String s) { String res = genLiteral2(s); assert res!=null; return res; } Set<String> usedSymbols = CollectionFactory.newHashSet(); void findUsedSymbols(String v) { if (usedSymbols.contains(v)) return; usedSymbols.add(v); final ArrayList<Rule> prods = grammar.get(v); if (prods==null) return; for (ArrayList<String> prod : prods) for (String s : prod) findUsedSymbols(s); } Set<String> findRoots() { Set<String> res = CollectionFactory.newHashSet(grammar.keySet()); for (ArrayList<Rule> products : grammar.values()) for (ArrayList<String> prod : products) for (String s : prod) res.remove(s); return res; } ArrayList<String> getNonTerminals() { ArrayList<String> res = new ArrayList<String>(grammar.keySet()); res.removeAll(getLiterals()); Collections.sort(res); return res; } ArrayList<String> getLiterals() { Set<String> res = CollectionFactory.newHashSet(); for (ArrayList<Rule> products : grammar.values()) for (ArrayList<String> prod : products) for (String s : prod) if (isLiteral(s)) { genLiteral(s); // to test it res.add(s); } ArrayList<String> sorted = new ArrayList<String>(res); Collections.sort(sorted); return sorted; } public static ArrayList<String> readFile(File f) { try { final BufferedReader in = new BufferedReader(new FileReader(f)); ArrayList<String> res = new ArrayList<String>(); String line; while ((line=in.readLine())!=null) { res.add(line); } in.close(); return res; } catch (IOException e) { throw new RuntimeException(e); } } public static final String NL = System.getProperty("line.separator"); public static void writeFile(File f, Collection<String> lines) { try { final BufferedWriter out = new BufferedWriter(new FileWriter(f)); for (String s : lines) { out.write(s); out.write(NL); } out.close(); } catch (IOException e) { throw new RuntimeException(e); } } }