package project.phase2.ll1parsergenerator;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.ParseException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import project.phase2.ll1parsergenerator.LL1Parser.RuleSelection;
/**
* Generates an LL1 Parser from an array of rules.
*
*/
public class ParserGenerator {
/**
* Generates a parser based upon the given rules.
*
* @param rules
* the rules for the parser.
* @return a new parser.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static LL1Parser generateParser(Rule[] rules) {
LL1Parser parse = new LL1Parser();
for (Rule rule : rules) {
parse.addRule(rule);
if (rule.isStart())
parse.setStartRule(rule);
}
Map[] firstMaps = getFirstMaps(rules);
Map<String, Set<String>> ruleFirstMap = firstMaps[0];
Map<Rule[], Set<String>> prodFirstMap = firstMaps[1];
Map<String, Set<String>> followMap = getFollowMap(ruleFirstMap, rules);
for (int i = 0; i < rules.length; ++i) {
if (rules[i].isTerminal())
continue;
String ruleName = rules[i].getName();
Rule[][] prodRules = rules[i].getRules();
for (int j = 0; j < prodRules.length; ++j) {
Set<String> tokens = new HashSet<String>(
prodFirstMap.get(prodRules[j]));
if (tokens.contains(LL1Parser.EPSILON)) {
tokens.remove(LL1Parser.EPSILON);
tokens.addAll(followMap.get(ruleName));
}
for (String token : tokens) {
parse.addRuleSelection(new RuleSelection(ruleName, token, j));
}
}
}
return parse;
}
@SuppressWarnings("rawtypes")
/**
* Get the maps of the first elements in each rule/production rule.
* @param rules the rules.
* @return the maps.
*/
public static Map[] getFirstMaps(Rule[] rules) {
Map<String, Set<String>> firstMap = new HashMap<String, Set<String>>();
Map<Rule[], Set<String>> prodFirstMap = new HashMap<Rule[], Set<String>>();
for (Rule rule : rules) {
if (rule.isTerminal())
continue;
firstMap.put(rule.getName(), new HashSet<String>());
for (Rule[] prod : rule.getRules()) {
prodFirstMap.put(prod, new HashSet<String>());
}
}
boolean changed = true;
while (changed) {
changed = false;
for (Rule rule : rules) {
if (rule.isTerminal())
continue;
Set<String> first = firstMap.get(rule.getName());
int startLen = first.size();
for (Rule[] prodRule : rule.getRules()) {
Set<String> prodFirst = prodFirstMap.get(prodRule);
int prodStartLen = prodFirst.size();
for (int i = 0; i <= prodRule.length; ++i) {
if (i == prodRule.length) {
prodFirst.add(LL1Parser.EPSILON);
}
if (prodRule[i].isTerminal()) {
prodFirst.add(prodRule[i].getName());
break;
} else {
Set<String> add = new HashSet<String>(
firstMap.get(prodRule[i].getName()));
add.remove(LL1Parser.EPSILON);
prodFirst.addAll(add);
if (!firstMap.get(prodRule[i].getName()).contains(
LL1Parser.EPSILON))
break;
}
}
first.addAll(prodFirst);
if (!changed && prodFirst.size() != prodStartLen)
changed = true;
}
if (!changed && first.size() != startLen)
changed = true;
}
}
return new Map[] { firstMap, prodFirstMap };
}
/**
* Gets the follow map for the given rules and first map.
*
* @param ruleFirstMap
* the first map.
* @param rules
* the rules.
* @return the follow map.
*/
public static Map<String, Set<String>> getFollowMap(
Map<String, Set<String>> ruleFirstMap, Rule[] rules) {
Map<String, Set<String>> followMap = new HashMap<String, Set<String>>();
for (String s : ruleFirstMap.keySet()) {
followMap.put(s, new HashSet<String>());
}
for (Rule r : rules) {
if (r.isStart()) {
followMap.get(r.getName()).add("$");
}
}
boolean changed = true;
while (changed) {
changed = false;
for (Rule rule : rules) {
if (rule.isTerminal())
continue;
for (Rule[] prod : rule.getRules()) {
for (int i = 0; i < prod.length; ++i) {
if (!prod[i].isTerminal()) {
Set<String> newFollow = new HashSet<String>();
Set<String> oldFollow = followMap.get(prod[i]
.getName());
int startLen = oldFollow.size();
for (int j = i + 1; j <= prod.length; ++j) {
if (j == prod.length) {
newFollow.addAll(followMap.get(rule
.getName()));
break;
}
if (prod[j].isTerminal()) {
if (prod[j].getName() == LL1Parser.EPSILON) {
continue;
} else {
newFollow.add(prod[j].getName());
break;
}
} else {
Set<String> add = new HashSet<String>(
ruleFirstMap.get(prod[j].getName()));
add.remove(LL1Parser.EPSILON);
newFollow.addAll(add);
if (!ruleFirstMap.get(prod[j].getName())
.contains(LL1Parser.EPSILON))
break;
}
}
oldFollow.addAll(newFollow);
if (!changed && oldFollow.size() != startLen)
changed = true;
}
}
}
}
}
return followMap;
}
/**
* Returns the first elements accepted by the given production rule.
*
* @param rules
* the production rule.
* @return the first set.
*/
public static Set<String> first(Rule[] rules) {
return first(rules, new HashSet<String>());
}
/**
* Utilized to avoid infinite loops.
*/
private static Set<String> first(Rule[] rules, Set<String> visited) {
Set<String> curr = new HashSet<String>();
for (Rule rule : rules) {
// We can just skip in this case right? Since we are adding nothing
// new to the rules.
if (visited.contains(rule.getName()))
continue;
if (curr.contains(LL1Parser.EPSILON))
curr.remove(LL1Parser.EPSILON);
if (rule.isTerminal()) {
curr.add(rule.getName());
} else {
Set<String> newVisited = new HashSet<String>(visited);
newVisited.add(rule.getName());
for (Rule[] newRules : rule.getRules()) {
curr.addAll(first(newRules, newVisited));
}
}
if (!curr.contains(LL1Parser.EPSILON))
break;
}
return curr;
}
//
// TESTING
//
public static void main(String[] args) throws IOException, ParseException {
RuleParser.parse("test/sample/grammar.txt");
Rule[] rules = RuleParser.rules.toArray(new Rule[0]);
LL1Parser parse = ParserGenerator.generateParser(rules);
FileInputStream fis = new FileInputStream(new File(
"test/sample/script.txt"));
AST<String> syn = parse.parse(fis);
System.out.println(syn.toString());
;
}
}