package project.phase2.ll1parsergenerator;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import project.phase2.ll1parsergenerator.dfastuff.Parser;
/**
* The representation of an LL1Parser.
*
*/
public class LL1Parser {
/**
* The names for some special cased tokens (epsilon, reg ex, ascii string,
* id).
*/
public static final String EPSILON = null;
public static final String REG_EX = "REGEX";
public static final String ASCII = "ASCII-STR";
public static final String ID = "ID";
/**
* Whether or not the scanning DFA should be minimized. This will
* significantly increase preprocessing time.
*/
public static boolean MINIMIZE_SCANNER = true;
/**
* The ending token used to tell us that we are done parsing.
*/
public static final String END = "$";
/**
* The rules representation.
*/
private Map<String, Rule> mRules;
/**
* The transition table.
*/
private Map<String, Map<String, Integer>> mParseTable;
/**
* The valid tokens scanner for this parser.
*/
private DFAScanner mScanner;
/**
* The start rule.
*/
private String mStartRule;
//
// CTOR
//
public LL1Parser() {
mRules = new HashMap<String, Rule>();
mParseTable = new HashMap<String, Map<String, Integer>>();
mScanner = new DFAScanner(MINIMIZE_SCANNER);
String letters = "[a-zA-Z]";
String numbers = "[0-9]";
String anything = "[" + (char) 0x20 + "-" + (char) 0x7e + "]";
String ascii = "\\\"([^\"] IN " + anything + ")*\\\"";
mScanner.addRegex(ascii, ASCII);
String regex = "\\\'([^\'] IN " + anything + ")*\\\'";
mScanner.addRegex(regex, REG_EX);
String id = letters;
for (int i = 0; i < 9; i++) {
id = id + "((" + letters + "|" + numbers + "|_)";
}
for (int i = 0; i < 9; i++) {
id = id + "|)";
}
mScanner.addRegex(id, ID);
mScanner.addRegex(END, END);
}
//
// PUBLIC METHODS
//
/**
* Adds the given rule to the parser.
*
* @param rule
* the rule to add.
*/
public String addRule(Rule rule) {
String name = rule.getName();
if (rule.isTerminal()) {
if (name == EPSILON || name.equals(ASCII) || name.equals(END)
|| name.equals(ID) || name.equals(REG_EX))
return null;
String newName = name;
int added = 0;
for (int i = 0; i < name.length(); i++) {
for (int j = 0; j < Parser.escape.length; j++) {
if (Parser.escape[j] == name.charAt(i)) {
newName = newName.substring(0, i + added) + "\\"
+ name.substring(i);
added++;
}
}
}
mScanner.addRegex(newName, name);
return null;
}
if (mRules.containsKey(name))
return null;
mRules.put(name, rule);
mParseTable.put(name, new HashMap<String, Integer>());
return name;
}
/**
* Adds the given rule selection criteria to the parse table.
*
* @param selection
* rules to be added to the parse table.
*/
public void addRuleSelection(RuleSelection selection) {
if (mParseTable.containsKey(selection.mRuleName)) {
Rule rule = mRules.get(selection.mRuleName);
if ((selection.mRule >= 0)
&& (selection.mRule < rule.getRules().length)) {
if (mParseTable.get(selection.mRuleName).containsKey(
selection.mToken)) {
System.out
.println("Warning: Ambiguous grammar detected on: ("
+ selection.mRuleName
+ ", \""
+ selection.mToken
+ "\"). Results may not be correct to grammar specifications.");
}
mParseTable.get(selection.mRuleName).put(selection.mToken,
selection.mRule);
}
}
}
/**
* Sets the given rule as the start rule.
*
* @param rule
* the rule to set as the start rule.
*/
public void setStartRule(Rule rule) {
mStartRule = rule.getName();
}
/**
* Parses the given file and returns an abstract syntax tree for the file.
*
* @param stream
* the stream to parse.
* @return an abstract syntax tree.
* @throws IOException
* @throws ParseException
*/
public AST<String> parse(InputStream stream) throws IOException,
ParseException {
ParseStack p = new ParseStack();
return p.parse(stream);
}
/**
* Returns a string representation of this parser.
*
* @return a string representation.
*/
public String toString() {
String ret = "LL1Parser:(\nRules:[\n";
for (Rule rule : mRules.values()) {
String currName = rule.getName();
for (Rule[] prod : rule.getRules()) {
ret = ret + currName + "->";
for (Rule elt : prod) {
ret += elt.getName() + " ";
}
ret = ret.trim();
ret += ",\n";
}
}
ret = ret.substring(0, ret.length() - 2);
ret += "],\n";
ret += "\nParse Table:[\n";
for (Map.Entry<String, Map<String, Integer>> ruleName : mParseTable
.entrySet()) {
String currName = ruleName.getKey();
ret += currName + ": {";
for (Map.Entry<String, Integer> transition : ruleName.getValue()
.entrySet()) {
ret += currName + "-" + transition.getKey() + "->"
+ transition.getValue() + ", ";
}
ret = ret.substring(0, ret.length() - 2);
ret += "},\n";
}
ret = ret.substring(0, ret.length() - 2);
ret += "])";
return ret;
}
//
// INNER CLASS
//
/**
* Provides an easy interface for representing an entry in the parse table.
*/
public static class RuleSelection {
//
// CLASS/INSTANCE DATA
//
/**
* The rule name this selection applies to.
*/
private String mRuleName;
/**
* The token to transition on.
*/
private String mToken;
/**
* The rule to transition to.
*/
private int mRule;
//
// CTOR
//
public RuleSelection(String name, String token, int rule) {
mRuleName = name;
mToken = token;
mRule = rule;
}
}
/**
* Helper class used during parsing.
*/
private class ParseStack extends InputStream {
//
// CLASS/INSTANCE DATA
//
/**
* The back-end stack that this class manages.
*/
private Stack<Rule> mStack;
/**
* The abstract syntax tree that this class builds during parsing.
*/
private AST<String> mResult;
/**
* The item being read.
*/
private InputStreamReader mStream;
/**
* Used in the case that a token needs to be put back into the stream.
* Tokens will be retrieved in reverse order that they
*/
private String mBuffer;
/**
* The character location in the stream.
*/
private int mLocation;
/**
* The line location in the stream.
*/
@SuppressWarnings("unused")
private int mLine;
//
// PUBLIC METHODS
//
/**
* Parses the given input stream and returns the abstract syntax tree.
*
* @param stream
* the stream to parse.
* @return the resultant tree.
*/
public AST<String> parse(InputStream stream) throws IOException,
ParseException {
mStream = new InputStreamReader(stream);
mBuffer = "";
mStack = new Stack<Rule>();
Rule endRule = new Rule(END);
endRule.setTerminal(true);
mStack.push(endRule);
mStack.push(mRules.get(mStartRule));
mResult = new AST<String>();
mResult.setRoot(parse());
char next = getNextCharacter();
Rule r = mStack.pop();
if (r.getName().equals(END) && Character.toString(next).equals(END))
return mResult;
else if (mStream.ready())
throw new ParseException(
"Finished parsing with content remaining in the file.",
mLocation);
else if (!mStack.empty())
throw new ParseException("Input ended unexpectedly.", mLocation);
else
throw new ParseException("An unexpected error occured.",
mLocation);
}
//
// PRIVATE METHODS
//
private char getNextCharacter() throws IOException, ParseException {
char next;
if (mBuffer.length() > 0) {
next = mBuffer.charAt(0);
if (next != (char) -1)
mBuffer = mBuffer.substring(1);
else
return next;
} else {
next = (char) mStream.read();
if (next == (char) -1) {
next = '$';
mBuffer = ((char) -1) + mBuffer;
}
}
mLocation++;
if (next == '\n') {
mLine++;
}
return next;
}
private void replaceCharacter(char c) {
mBuffer = c + mBuffer;
mLocation--;
if (c == '\n') {
mLine--;
}
}
private ASTNode<String> parse() throws IOException, ParseException {
Rule currRule = mStack.pop();
if (currRule.isTerminal()) {
String currName = currRule.getName();
ASTNode<String> ret;
if (currName == EPSILON) {
ret = new ASTNode<String>(EPSILON);
ret.setTerminal(true);
return ret;
}
// Remove leading whitespace.
Character next;
while (Character.isWhitespace((next = getNextCharacter()))
&& (next != (char) -1))
;
int backTrack = 1;
if (currName.equals(END)) {
if (!END.equals("" + next))
throw new ParseException(
"Expected end of input. Instead got: "
+ Character.toString(next), mLocation
- backTrack);
return null;
} else if (currName.equals(ASCII)) {
if (next != '\"')
throw new ParseException(
"Expected ASCII string enclosed with \"\"\".",
mLocation - backTrack);
String asc = "";
while ((next = getNextCharacter()) != '\"'
&& (next != (char) -1)) {
asc += next;
backTrack++;
}
if (next == (char) -1)
throw new ParseException(
"Expected ASCII string enclosed with \"\"\".",
mLocation - backTrack);
ret = new ASTNode<String>(asc);
ret.setTerminal(true);
return ret;
} else if (currName.equals(REG_EX)) {
if (next != '\'')
throw new ParseException(
"Expected regular expression enclosed with \"\'\".",
mLocation - backTrack);
String reg = "";
while ((next = getNextCharacter()) != '\''
&& (next != (char) -1)) {
reg += next;
backTrack++;
}
if (next == (char) -1)
throw new ParseException(
"Expected regular expression enclosed with \"\'\".",
mLocation - backTrack);
// Create our DFA that we will need to recognize the
// content.
try {
Parser.fromString(reg);
} catch (Exception e) {
throw new ParseException(
"An error occured while parsing the given regular expression: "
+ reg, mLocation - backTrack);
}
ret = new ASTNode<String>(reg);
ret.setTerminal(true);
return ret;
} else if (currName.equals(ID)) {
if (!Character.isLetter(next))
throw new ParseException(
"Identifiers must begin with a letter.",
mLocation - backTrack);
String id = "" + next;
while ((id.length() < 10)
&& (Character
.isLetterOrDigit((next = getNextCharacter())) || next == '_')) {
id += next;
}
// Replace a character if we determined it was not part of
// our identifier.
if (!(Character.isLetterOrDigit(next) || next == '_'))
replaceCharacter(next);
ret = new ASTNode<String>(id);
ret.setTerminal(true);
return ret;
} else {
String curr = "" + next;
while (curr.length() < currName.length()) {
curr += getNextCharacter();
backTrack++;
}
if (!curr.equals(currName))
throw new ParseException(
"Unexpected token encounterd: " + curr
+ "; Expected: " + currName, mLocation
- backTrack);
ret = new ASTNode<String>(curr);
ret.setTerminal(true);
return ret;
}
} else {
String type = determineNextTokenType();
if (!mParseTable.get(currRule.getName()).containsKey(type)) {
throw new ParseException(
"No Grammar Rule Found for Token Type: " + type,
mLocation);
} else {
int rule = mParseTable.get(currRule.getName()).get(type);
Rule[] prodRule = currRule.getRules()[rule];
for (int i = 0; i < prodRule.length; i++) {
mStack.push(prodRule[prodRule.length - (i + 1)]);
}
ASTNode<String> ret = new ASTNode<String>(
currRule.getName());
for (int i = 0; i < prodRule.length; i++) {
ret.insert(parse());
}
if ("<file-names>".equals(currRule.getName())) {
String src = ret.getChildren().get(0).getChildren()
.get(0).getValue();
String dest = ret.getChildren().get(2).getChildren()
.get(0).getValue();
if (src.equals(dest))
throw new ParseException(
"Source and destination file same in Replace or Recursive Replace.",
mLocation);
} else if (prodRule.length > 0
&& "recursivereplace".equals(prodRule[0].getName())) {
String regex = ret.getChildren().get(1).getValue();
String ascii = ret.getChildren().get(3).getValue();
if (regex.equals(ascii))
throw new ParseException(
"Replace detection and replace target are same in Recursive Replace.",
mLocation);
}
return ret;
}
}
}
private String determineNextTokenType() throws IOException,
ParseException {
// Remove leading whitespace.
Character next;
while (Character.isWhitespace((next = getNextCharacter()))
&& (next != (char) -1))
;
replaceCharacter(next);
String nextToken;
try {
nextToken = mScanner.labelToken(this);
} catch (ParseException ex) {
String rep = mScanner.getBuffer();
for (int i = 0; i < rep.length(); i++) {
replaceCharacter(rep.charAt(rep.length() - (i + 1)));
}
throw new ParseException("Token Unrecognized by Scanner: "
+ ex.getMessage(), mLocation);
}
String rep = mScanner.getBuffer();
for (int i = 0; i < rep.length(); i++) {
replaceCharacter(rep.charAt(rep.length() - (i + 1)));
}
String[] identifiers = nextToken.split("\\+");
boolean selected = false;
if (identifiers.length > 1) {
for (String s : identifiers) {
if (!s.equals(ID)) {
selected = true;
nextToken = s;
break;
}
}
if (!selected)
nextToken = identifiers[0];
}
return nextToken;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#read()
*/
@Override
public int read() throws IOException {
try {
return getNextCharacter();
} catch (ParseException ex) {
throw new IOException(
"Error while attempting to determine next token.", ex);
}
}
}
}