/**
* Author: Georg Hofferek <georg.hofferek@iaik.tugraz.at>
*/
package at.iaik.suraq.parser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import at.iaik.suraq.exceptions.ParseError;
import at.iaik.suraq.sexp.SExpression;
import at.iaik.suraq.sexp.Token;
/**
* A simple parser for LISP-like S-expressions.
*
* @author Georg Hofferek <georg.hofferek@iaik.tugraz.at>
*/
public class SExpParser extends Parser {
/**
*
*/
private static final long serialVersionUID = 1L;
/**
* The current state of this parser. If <code>true</code> it's processing a
* comment.
*/
private boolean commentState = false;
/**
* Indicates whether or not the parser is processing a quoted token at the
* moment.
*/
private boolean quotedToken = false;
/**
* The source string to be parsed into an S-expression.
*/
private final List<String> sourceLines;
/**
* The current expression the parser is working on.
*/
private SExpression currentExpr = null;
/**
* The current line number, or 0 if none.
*/
private int currentLineNumber = 0;
/**
* The current line.
*/
private String currentLine = null;
/**
* The current column number.
*/
private int currentColumnNumber = 0;
/**
* The string (buffer) representation of the current token.
*/
private StringBuffer currentToken = null;
/**
* The (implicit) root of the parse tree.
*/
private SExpression rootExpr = null;
/**
* A stack of parent expressions of the current expression.
*/
private Stack<SExpression> parentExpr;
/**
* Creates a parser to parse the given string. If <code>source</code> is
* <code>null</code> it is treated like the empty string.
*
* @param source
* the string to parse.
*/
public SExpParser(String source) {
if (source == null)
source = "";
String[] stringArray = source.split("\n");
sourceLines = new ArrayList<String>();
for (String string : stringArray) {
sourceLines.add(string);
}
}
/**
* Constructs a new <code>SExpParser</code>, to parse the given file. The
* file is read during construction of this parser object.
*
* @param sourceFile
* the file to read.
* @throws IOException
* if an I/O exception occurs during reading the file
* @throws FileNotFoundException
* if the given file cannot be found/read.
*/
public SExpParser(File sourceFile) throws IOException,
FileNotFoundException {
FileReader reader = new FileReader(sourceFile);
BufferedReader bufferedReader = new BufferedReader(reader);
sourceLines = new ArrayList<String>();
String currentLine = bufferedReader.readLine();
while (currentLine != null) {
sourceLines.add(currentLine);
currentLine = bufferedReader.readLine();
}
bufferedReader.close();
reader.close();
}
/**
* Constructs a new <code>SExpParser</code>.
*
* @param input
* @throws IOException
*/
public SExpParser(BufferedReader input) throws IOException {
sourceLines = new ArrayList<String>();
String currentLine = input.readLine();
while (currentLine != null) {
sourceLines.add(currentLine);
currentLine = input.readLine();
}
input.close();
}
/**
* @return an array containing all the source lines.
*/
public String[] getSourceLines() {
String[] array = { "" };
return (sourceLines.toArray(array));
}
/**
* Parses the input specified at construction time. The parsed s-expression
* is stored in <code>rootExpr</code>.
*
* @throws ParseError
* if parsing fails.
*/
@Override
public void parse() throws ParseError {
rootExpr = new SExpression();
parentExpr = new Stack<SExpression>();
parentExpr.push(rootExpr);
currentExpr = null;
while (++currentLineNumber <= sourceLines.size()) {
currentLine = sourceLines.get(currentLineNumber - 1);
currentColumnNumber = 0;
commentState = false;
if (currentToken != null)
storeToken();
if (quotedToken) {
assert (currentToken != null);
currentToken.append('\n');
}
for (char character : currentLine.toCharArray()) {
currentColumnNumber++;
if (character == ';') // start of a comment
commentState = true;
if (commentState) // ignore rest of line
continue;
if (character == '"')
throw new ParseError(currentLineNumber,
currentColumnNumber, currentLine,
"Found '\"'. String literals currently not supported!");
if (character == '(' && !quotedToken) { // start of a
// subexpression
if (currentToken != null)
storeToken();
if (currentExpr != null)
parentExpr.push(currentExpr);
currentExpr = new SExpression();
continue;
}
if (character == ')' && !quotedToken) { // end of a
// subexpression
if (currentToken != null)
storeToken();
if (currentExpr == null || parentExpr.size() < 1)
throw new ParseError(currentLineNumber,
currentColumnNumber, currentLine,
"Unmatched \")\".");
else {
currentExpr.setLineNumber(currentLineNumber);
currentExpr.setColumnNumber(currentColumnNumber);
parentExpr.peek().addChild(currentExpr);
if (parentExpr.size() == 1) { // only the root
// expression is left.
currentExpr = null;
} else {
currentExpr = parentExpr.pop();
}
}
continue;
}
if (character == ' ' || character == '\t' || character == '\n'
|| character == '\r') { // whitespace
if (currentToken == null) // no current token, just ignore
// the whitespace
continue;
else {
if (quotedToken) // we are in a quoted token. Whitespace
// belongs to token.
currentToken.append(character);
else
// this whitespace ends the token. Store it.
storeToken();
}
continue;
}
if (character == '|') {
if (currentToken == null) { // no current token --> start
// new quoted token
quotedToken = true;
currentToken = new StringBuffer();
} else {
if (quotedToken) {
// the end of the quoted token. Store it.
storeToken();
quotedToken = false;
} else { // found a | in a non-quoted token. --> Error
throw new ParseError(currentLineNumber,
currentColumnNumber, currentLine,
"Unexpected '|'.");
}
}
continue;
}
// We are dealing with an "ordinary" character. So either just
// append it to the current token or start a new token.
if (currentToken == null)
currentToken = new StringBuffer();
currentToken.append(character);
continue;
}
}
if (currentToken != null) {
if (!quotedToken)
storeToken();
else
// end of input while waiting for closing '"' of quoted token
throw new ParseError(sourceLines.size(), "Missing '\"'.");
}
if (currentExpr != null) { // end of input while waiting for closing ")"
throw new ParseError(sourceLines.size(), "Missing \")\".");
}
// The End. Parsing was successful
this.parsingSuccessfull = true;
}
/**
* Stores the current token in the parse tree.
*/
private void storeToken() {
if (currentToken.charAt(0) == '|') {
assert (currentToken.charAt(currentToken.length() - 1) == '|');
currentToken = new StringBuffer(currentToken.subSequence(1,
currentToken.length()));
}
if (currentExpr != null)
currentExpr.addChild(Token.generate(currentToken,
currentLineNumber, currentColumnNumber));
else {
assert (parentExpr.size() == 1);
parentExpr.peek().addChild(
Token.generate(currentToken, currentLineNumber,
currentColumnNumber));
}
currentToken = null;
}
/**
* Returns a deep copy of the root expression determined by this parser, or
* <code>null</code>, if parsing was not successful (or not even attempted).
*
* @return a (deep) copy of the <code>rootExpr</code>, or <code>null</code>
* if parsing did not complete successfully
*/
public SExpression getRootExpr() {
if (parsingSuccessfull) {
assert (rootExpr != null);
return rootExpr.deepCopy();
} else {
return null;
}
}
}