/*
* tuProlog - Copyright (C) 2001-2002 aliCE team at deis.unibo.it
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package alice.tuprolog;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.regex.Pattern;
/**
* This class defines a parser of prolog terms and sentences.
* <p/>
* BNF part 2: Parser
* term ::= exprA(1200)
* exprA(n) ::= exprB(n) { op(yfx,n) exprA(n-1) |
* op(yf,n) }*
* exprB(n) ::= exprC(n-1) { op(xfx,n) exprA(n-1) |
* op(xfy,n) exprA(n) |
* op(xf,n) }*
* // exprC is called parseLeftSide in the code
* exprC(n) ::= '-' integer | '-' float |
* op( fx,n ) exprA(n-1) |
* op( fy,n ) exprA(n) |
* exprA(n)
* exprA(0) ::= integer |
* float |
* atom |
* variable |
* atom'(' exprA(1200) { ',' exprA(1200) }* ')' |
* '[' [ exprA(1200) { ',' exprA(1200) }* [ '|' exprA(1200) ] ] ']' |
* '(' { exprA(1200) }* ')'
* '{' { exprA(1200) }* '}'
* op(type,n) ::= atom | { symbol }+
*/
public class Parser implements Iterable<Term> {
private static class IdentifiedTerm {
private int priority;
private Term result;
public IdentifiedTerm(int priority, Term result) {
this.priority = priority;
this.result = result;
}
}
private static OperatorManager defaultOperatorManager = new DefaultOperatorManager();
private Tokenizer tokenizer;
private OperatorManager opManager = defaultOperatorManager;
/**
* Creating a Parser specifying how to handle operators
* and what text to parse.
*/
public Parser(OperatorManager op, InputStream theoryText) {
this(theoryText);
if (op != null)
opManager = op;
}
/**
* Creating a Parser specifying how to handle operators
* and what text to parse.
*/
public Parser(OperatorManager op, String theoryText) {
this(theoryText);
if (op != null)
opManager = op;
}
/**
* Creating a parser with default operator interpretation.
*/
public Parser(String theoryText) {
tokenizer = new Tokenizer(theoryText);
}
/**
* Creating a parser with default operator interpretation.
*/
public Parser(InputStream theoryText) {
tokenizer = new Tokenizer(new BufferedReader(new InputStreamReader(theoryText)));
}
// user interface
@Override
public Iterator<Term> iterator() {
return new TermIterator(this);
}
/**
* Parses next term from the stream built on string.
* @param endNeeded <tt>true</tt> if it is required to parse the end token
* (a period), <tt>false</tt> otherwise.
* @throws InvalidTermException if a syntax error is found.
*/
public Term nextTerm(boolean endNeeded) throws InvalidTermException {
try {
Token t = tokenizer.readToken();
if (t.isEOF())
return null;
tokenizer.unreadToken(t);
Term term = expr(false);
if (term == null)
throw new InvalidTermException("The parser is unable to finish.");
if (endNeeded && tokenizer.readToken().getType() != Tokenizer.END)
throw new InvalidTermException("The term " + term + " is not ended with a period.");
term.resolveTerm();
return term;
} catch (IOException ex) {
throw new InvalidTermException("An I/O error occured.");
}
}
/**
* Static service to get a term from its string representation.
*/
public static Term parseSingleTerm(String st) throws InvalidTermException {
return parseSingleTerm(st, null);
}
/**
* Static service to get a term from its string representation,
* providing a specific operator manager.
*/
public static Term parseSingleTerm(String st, OperatorManager op) throws InvalidTermException {
try {
Parser p = new Parser(op, st);
Token t = p.tokenizer.readToken();
if (t.isEOF())
throw new InvalidTermException("Term starts with EOF");
p.tokenizer.unreadToken(t);
Term term = p.expr(false);
if (term == null)
throw new InvalidTermException("Term is null");
if (!p.tokenizer.readToken().isEOF())
throw new InvalidTermException("The enitire string could not be read as one term");
term.resolveTerm();
return term;
} catch (IOException ex) {
throw new InvalidTermException("An I/O error occured");
}
}
public int getCurrentLine() {
return tokenizer.lineno();
}
// internal parsing procedures
private Term expr(boolean commaIsEndMarker) throws InvalidTermException, IOException {
return exprA(OperatorManager.OP_HIGH, commaIsEndMarker).result;
}
private IdentifiedTerm exprA(int maxPriority, boolean commaIsEndMarker) throws InvalidTermException, IOException {
IdentifiedTerm leftSide = exprB(maxPriority, commaIsEndMarker);
// if (leftSide == null)
// return null;
//{op(yfx,n) exprA(n-1) | op(yf,n)}*
Token t = tokenizer.readToken();
for (; t.isOperator(commaIsEndMarker); t = tokenizer.readToken()) {
int YFX = opManager.opPrio(t.seq, "yfx");
int YF = opManager.opPrio(t.seq, "yf");
//YF and YFX has a higher priority than the left side expr and less then top limit
// if (YF < leftSide.priority && YF > OperatorManager.OP_HIGH) YF = -1;
if (YF < leftSide.priority || YF > maxPriority) YF = -1;
// if (YFX < leftSide.priority && YFX > OperatorManager.OP_HIGH) YFX = -1;
if (YFX < leftSide.priority || YFX > maxPriority) YFX = -1;
//YFX has priority over YF
if (YFX >= YF && YFX >= OperatorManager.OP_LOW){
IdentifiedTerm ta = exprA(YFX-1, commaIsEndMarker);
if (ta != null) {
leftSide = new IdentifiedTerm(YFX, new Struct(t.seq, leftSide.result, ta.result));
continue;
}
}
//either YF has priority over YFX or YFX failed
if (YF >= OperatorManager.OP_LOW) {
leftSide = new IdentifiedTerm(YF, new Struct(t.seq, leftSide.result));
continue;
}
break;
}
tokenizer.unreadToken(t);
return leftSide;
}
private IdentifiedTerm exprB(int maxPriority, boolean commaIsEndMarker) throws InvalidTermException, IOException {
//1. op(fx,n) exprA(n-1) | op(fy,n) exprA(n) | expr0
IdentifiedTerm left = parseLeftSide(commaIsEndMarker, maxPriority);
//2.left is followed by either xfx, xfy or xf operators, parse these
Token operator = tokenizer.readToken();
for (; operator.isOperator(commaIsEndMarker); operator = tokenizer.readToken()) {
int XFX = opManager.opPrio(operator.seq, "xfx");
int XFY = opManager.opPrio(operator.seq, "xfy");
int XF = opManager.opPrio(operator.seq, "xf");
//check that no operator has a priority higher than permitted
//or a lower priority than the left side expression
if (XFX > maxPriority || XFX < OperatorManager.OP_LOW) XFX = -1;
if (XFY > maxPriority || XFY < OperatorManager.OP_LOW) XFY = -1;
if (XF > maxPriority || XF < OperatorManager.OP_LOW) XF = -1;
//XFX
boolean haveAttemptedXFX = false;
if (XFX >= XFY && XFX >= XF && XFX >= left.priority) { //XFX has priority
IdentifiedTerm found = exprA(XFX - 1, commaIsEndMarker);
if (found != null) {
Struct xfx = new Struct(operator.seq, left.result, found.result);
left = new IdentifiedTerm(XFX, xfx);
continue;
} else
haveAttemptedXFX = true;
}
//XFY
if (XFY >= XF && XFY >= left.priority){ //XFY has priority, or XFX has failed
IdentifiedTerm found = exprA(XFY, commaIsEndMarker);
if (found != null) {
Struct xfy = new Struct(operator.seq, left.result, found.result);
left = new IdentifiedTerm(XFY, xfy);
continue;
}
}
//XF
if (XF >= left.priority) //XF has priority, or XFX and/or XFY has failed
return new IdentifiedTerm(XF, new Struct(operator.seq, left.result));
//XFX did not have top priority, but XFY failed
if (!haveAttemptedXFX && XFX >= left.priority) {
IdentifiedTerm found = exprA(XFX - 1, commaIsEndMarker);
if (found != null) {
Struct xfx = new Struct(operator.seq, left.result, found.result);
left = new IdentifiedTerm(XFX, xfx);
continue;
}
}
break;
}
tokenizer.unreadToken(operator);
return left;
}
/**
* Parses and returns a valid 'left side' of an expression.
* If the left side starts with a prefix, it consumes other expressions with a lower priority than itself.
* If the left side does not have a prefix it must be an expr0.
*
* @param commaIsEndMarker used when the left side is part of and argument list of expressions
* @param maxPriority operators with a higher priority than this will effectively end the expression
* @return a wrapper of: 1. term correctly structured and 2. the priority of its root operator
* @throws InvalidTermException
*/
private IdentifiedTerm parseLeftSide(boolean commaIsEndMarker, int maxPriority) throws InvalidTermException, IOException {
//1. prefix expression
Token f = tokenizer.readToken();
if (f.isOperator(commaIsEndMarker)) {
int FX = opManager.opPrio(f.seq, "fx");
int FY = opManager.opPrio(f.seq, "fy");
if (f.seq.equals("-")) {
Token t = tokenizer.readToken();
if (t.isNumber())
return new IdentifiedTerm(0, Parser.createNumber("-" + t.seq));
else
tokenizer.unreadToken(t);
}
//check that no operator has a priority higher than permitted
if (FY > maxPriority) FY = -1;
if (FX > maxPriority) FX = -1;
//FX has priority over FY
boolean haveAttemptedFX = false;
if (FX >= FY && FX >= OperatorManager.OP_LOW){
IdentifiedTerm found = exprA(FX-1, commaIsEndMarker); //op(fx, n) exprA(n - 1)
if (found != null)
return new IdentifiedTerm(FX, new Struct(f.seq, found.result));
else
haveAttemptedFX = true;
}
//FY has priority over FX, or FX has failed
if (FY >= OperatorManager.OP_LOW) {
IdentifiedTerm found = exprA(FY, commaIsEndMarker); //op(fy,n) exprA(1200) or op(fy,n) exprA(n)
if (found != null)
return new IdentifiedTerm(FY, new Struct(f.seq, found.result));
}
//FY has priority over FX, but FY failed
if (!haveAttemptedFX && FX >= OperatorManager.OP_LOW) {
IdentifiedTerm found = exprA(FX-1, commaIsEndMarker); //op(fx, n) exprA(n - 1)
if (found != null)
return new IdentifiedTerm(FX, new Struct(f.seq, found.result));
}
}
tokenizer.unreadToken(f);
//2. expr0
return new IdentifiedTerm(0, expr0());
}
/**
* exprA(0) ::= integer |
* float |
* variable |
* atom |
* atom( exprA(1200) { , exprA(1200) }* ) |
* '[' exprA(1200) { , exprA(1200) }* [ | exprA(1200) ] ']' |
* '{' [ exprA(1200) ] '}' |
* '(' exprA(1200) ')'
*/
private Term expr0() throws InvalidTermException, IOException {
Token t1 = tokenizer.readToken();
if (t1.isType(Tokenizer.INTEGER))
return Parser.parseInteger(t1.seq); // TODO moved method to Number
if (t1.isType(Tokenizer.FLOAT))
return Parser.parseFloat(t1.seq); // TODO moved method to Number
if (t1.isType(Tokenizer.VARIABLE))
return new Var(t1.seq); // TODO switched to use the internal check for "_" in Var(String)
if (t1.isType(Tokenizer.ATOM) || t1.isType(Tokenizer.SQ_SEQUENCE) || t1.isType(Tokenizer.DQ_SEQUENCE)) {
if (!t1.isFunctor())
return new Struct(t1.seq);
String functor = t1.seq;
Token t2 = tokenizer.readToken(); //reading left par
if (!t2.isType(Tokenizer.LPAR))
throw new InvalidTermException("bug in parsing process. Something identified as functor misses its first left parenthesis");//todo check can be skipped
LinkedList<Term> a = expr0_arglist(); //reading arguments
Token t3 = tokenizer.readToken();
if (t3.isType(Tokenizer.RPAR)) //reading right par
return new Struct(functor, a);
throw new InvalidTermException("Missing right parenthesis: ("+a + " -> here <-");
}
if (t1.isType(Tokenizer.LPAR)) {
Term term = expr(false);
if (tokenizer.readToken().isType(Tokenizer.RPAR))
return term;
throw new InvalidTermException("Missing right parenthesis: ("+term + " -> here <-");
}
if (t1.isType(Tokenizer.LBRA)) {
Token t2 = tokenizer.readToken();
if (t2.isType(Tokenizer.RBRA))
return new Struct();
tokenizer.unreadToken(t2);
Term term = expr0_list();
if (tokenizer.readToken().isType(Tokenizer.RBRA))
return term;
throw new InvalidTermException("Missing right bracket: ["+term + " -> here <-");
}
if (t1.isType(Tokenizer.LBRA2)) {
Token t2 = tokenizer.readToken();
if (t2.isType(Tokenizer.RBRA2))
return new Struct("{}");
tokenizer.unreadToken(t2);
Term arg = expr(false);
t2 = tokenizer.readToken();
if (t2.isType(Tokenizer.RBRA2))
return new Struct("{}", arg);
throw new InvalidTermException("Missing right braces: {"+arg + " -> here <-");
}
throw new InvalidTermException("The following token could not be identified: "+t1.seq);
}
// TODO make non-recursive?
private Term expr0_list() throws InvalidTermException, IOException {
Term head = expr(true);
Token t = tokenizer.readToken();
if (",".equals(t.seq))
return new Struct(head, expr0_list());
if ("|".equals(t.seq))
return new Struct(head, expr(true));
if ("]".equals(t.seq)) {
tokenizer.unreadToken(t);
return new Struct(head, new Struct());
}
throw new InvalidTermException("The expression: " + head + "\nis not followed by either a ',' or '|' or ']'.");
}
// TODO make non-recursive
private LinkedList<Term> expr0_arglist() throws InvalidTermException, IOException {
Term head = expr(true);
Token t = tokenizer.readToken();
if (",".equals(t.seq)) {
LinkedList<Term> l = expr0_arglist();
l.addFirst(head);
return l;
}
if (")".equals(t.seq)) {
tokenizer.unreadToken(t);
LinkedList<Term> l = new LinkedList<Term>();
l.add(head);
return l;
}
throw new InvalidTermException("The argument: " + head + "\nis not followed by either a ',' or ')'.\nline: " + tokenizer.lineno());
}
// commodity methods to parse numbers
static Number parseInteger(String s) {
long num = java.lang.Long.parseLong(s);
if (num > Integer.MIN_VALUE && num < Integer.MAX_VALUE)
return new Int((int) num);
else
return new Long(num);
}
static Double parseFloat(String s) {
double num = java.lang.Double.parseDouble(s);
return new Double(num);
}
static Number createNumber(String s){
try {
return parseInteger(s);
} catch (Exception e) {
return parseFloat(s);
}
}
/**
* @return true if the String could be a prolog atom
*/
public static boolean isAtom(String s) {
return atom.matcher(s).matches();
}
private static Pattern atom = Pattern.compile("(!|[a-z][a-zA-Z_0-9]*)");
//
/**
* This class represents an iterator of terms from Prolog text embedded
* in a parser. Note that this class resembles more a generator than an
* iterator type. In fact, both {@link TermIterator#next()} and
* {@link TermIterator#hasNext()} throws {@link InvalidTermException} if
* the next term they are trying to return or check for contains a syntax
* error; this is due to both methods trying to generate the next term
* instead of just returning it or checking for its existence from a pool
* of already produced terms.
*/
private static class TermIterator implements Iterator<Term> {
private Parser parser;
private boolean hasNext;
private Term next;
TermIterator(Parser p) {
parser = p;
next = parser.nextTerm(true);
hasNext = (next != null);
}
@Override
public Term next() {
if (hasNext) {
if (next == null) {
next = parser.nextTerm(true);
if (next == null)
throw new NoSuchElementException();
}
hasNext = false;
Term temp = next;
next = null;
return temp;
} else
if (hasNext()) {
hasNext = false;
Term temp = next;
next = null;
return temp;
}
throw new NoSuchElementException();
}
/**
* @throws InvalidTermException if, while the parser checks for the
* existence of the next term, a syntax error is encountered.
*/
@Override
public boolean hasNext() {
if (hasNext)
return hasNext;
next = parser.nextTerm(true);
if (next != null)
hasNext = true;
return hasNext;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}