package project.phase2.ll1parsergenerator;
import java.io.File;
import java.util.LinkedList;
import java.util.List;
import java.util.Scanner;
import project.phase2.file.FileIO;
/**
* Parses rules from grammar file
*
*/
public class RuleParser {
public static String[] predef;
public static LinkedList<Rule> rules;
/**
* Adds the default terminals to the rule list. Also initializes the rules
* list.
*/
private static void initializeTerminals() {
rules = new LinkedList<Rule>();
addTerminal("E");
addTerminal("REGEX");
addTerminal("ASCII-STR");
addTerminal("ID");
}
/**
* Adds a terminal
*
* @param s
*/
private static void addTerminal(String s) {
Rule r = new Rule(s);
r.setTerminal(true);
rules.add(r);
}
/**
* parses the input scanner to generate the set of rules. The input scanner
* must have the predefined symbols, start state, and grammar as specified.
*
* @param string
*/
public static List<Rule> parse(String path) {
Scanner input = null;
try {
input = new Scanner(FileIO.readEntireFile(new File(path)));
} catch(Exception e) {
e.printStackTrace();
}
String line = null;
initializeTerminals();
// Remove leading blank lines up to the predefines variables
line = input.nextLine();
while (line.length() < 9
|| line.substring(0, 9).compareTo("%% Tokens") != 0) {
line = input.nextLine();
}
// Predefined token line
line = input.nextLine();
while (line.compareTo("") == 0)
line = input.nextLine();
predefined(line);
// Remove any leading space before start section
line = input.nextLine();
while (line.length() < 8
|| line.substring(0, 8).compareTo("%% Start") != 0) {
line = input.nextLine();
}
// start symbol
line = input.nextLine();
while (line.compareTo("") == 0)
line = input.nextLine();
String start = line;
// removing any leading space before rules section
line = input.nextLine();
while (line.length() < 8
|| line.substring(0, 8).compareTo("%% Rules") != 0) {
line = input.nextLine();
}
// Rules begin.
readRules(input);
// Set start rule
boolean found = false;
for (Rule r : rules) {
if (r.getName().compareTo(start) == 0) {
r.setStart(true);
found = true;
}
}
if (!found) {
System.out.println("Start state is " + start
+ " but no such rule found.");
System.exit(0);
}
for (Rule r : rules) {
if (r.getName().compareTo("E") == 0) {
//TODO epsilon symbol, also change it in LL1Parser
r.setName(null);
}
}
return rules;
}
/**
* Splits a line into the rule's name and production.
*
* @param s
* @return
*/
private static String[] splitLine(String s) {
String[] s2 = new String[2];
s2[0] = "";
s2[1] = "";
int i = 0;
if (s == null || s.compareTo("") == 0)
return null;
if (s.charAt(0) == '%')
return null;
while (s.charAt(i) != '=') {
if (!Character.isWhitespace(s.charAt(i)))
s2[0] += s.charAt(i);
i++;
}
i++;
while (i < s.length()) {
if (!Character.isWhitespace(s.charAt(i)))
s2[1] += s.charAt(i);
i++;
}
return s2;
}
/**
* Reads in the predefined symbol line.
*
* @param s
*/
private static void predefined(String s) {
boolean t;
Rule r;
String s2 = "";
int i = 0;
while (i < s.length()) {
if (s.charAt(i) == ' ') {
t = true;
for (Rule r2 : rules) {
if (r2.getName().compareTo(s2) == 0)
t = false;
}
if (t && s2.length() >= 1) {
r = new Rule(s2);
r.setTerminal(true);
rules.add(r);
}
s2 = "";
i++;
} else {
s2 += s.charAt(i);
i++;
}
}
if (s2.length() >= 1) {
r = new Rule(s2);
r.setTerminal(true);
rules.add(r);
}
}
/**
* Reads the rules recursively. This is so all rule names can be read before
* any production rules are consumed.
*
* @param input
*/
private static void readRules(Scanner input) {
if (!input.hasNext())
return;
String[] line = splitLine(input.nextLine());
Rule r = null;
LinkedList<Rule> production = new LinkedList<Rule>();
if (line == null) {
readRules(input);
} else {
for (Rule r2 : rules) {
if (r2.getName().compareTo(line[0]) == 0) {
r = r2;
}
}
if (r == null) {
r = new Rule(line[0]);
rules.add(r);
}
readRules(input);
while (line[1].length() != 0) {
if (line[1].charAt(0) == ' ') {
line[1] = line[1].substring(1);
} else if (line[1].charAt(0) == '|') {
line[1] = line[1].substring(1);
r.addProductionRule(production.toArray(new Rule[production
.size()]));
production = new LinkedList<Rule>();
} else {
line[1] = matchRule(line[1], production);
}
}
r.addProductionRule(production.toArray(new Rule[production.size()]));
}
return;
}
/**
* Matches a rule name in the production rule to a rule in the list.
*
* @param s
* @param production
* @return
*/
private static String matchRule(String s, LinkedList<Rule> production) {
Rule longestFound = null;
String ruleName;
for (Rule r : rules) {
ruleName = r.getName();
if (ruleName.length() <= s.length()
&& ruleName.compareTo(s.substring(0, ruleName.length())) == 0) {
if (longestFound == null)
longestFound = r;
else if (ruleName.length() > longestFound.getName().length()) {
longestFound = r;
}
}
}
if (longestFound == null) {
throw new RuntimeException("Cannot be matched: " + s);
}
s = s.substring(longestFound.getName().length());
production.add(longestFound);
return s;
}
}