Parser.java example

Explorer
tuprolog-5-master
- src
  - alice
    - tuprolog
- test
/*
 * tuProlog - Copyright (C) 2001-2002  aliCE team at deis.unibo.it
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
package alice.tuprolog;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.regex.Pattern;

/**
 * This class defines a parser of prolog terms and sentences.
 * <p/>
 * BNF part 2: Parser
 * term ::= exprA(1200)
 * exprA(n) ::= exprB(n) { op(yfx,n) exprA(n-1) |
 *                         op(yf,n) }*
 * exprB(n) ::= exprC(n-1) { op(xfx,n) exprA(n-1) |
 *                           op(xfy,n) exprA(n) |
 *                           op(xf,n) }*
 * // exprC is called parseLeftSide in the code
 * exprC(n) ::= '-' integer | '-' float |
 *              op( fx,n ) exprA(n-1) |
 *              op( fy,n ) exprA(n) |
 *              exprA(n)
 * exprA(0) ::= integer |
 *              float |
 *              atom |
 *              variable |
 *              atom'(' exprA(1200) { ',' exprA(1200) }* ')' |
 *              '[' [ exprA(1200) { ',' exprA(1200) }* [ '|' exprA(1200) ] ] ']' |
 *              '(' { exprA(1200) }* ')'
 *              '{' { exprA(1200) }* '}'
 * op(type,n) ::= atom | { symbol }+
 */
public class Parser implements Iterable<Term> {

    private static class IdentifiedTerm {
        private int priority;
        private Term result;
        
        public IdentifiedTerm(int priority, Term result) {
            this.priority = priority;
            this.result = result;
        }
    }

    private static OperatorManager defaultOperatorManager = new DefaultOperatorManager();

    private Tokenizer tokenizer;
    private OperatorManager opManager = defaultOperatorManager;

    /**
     * Creating a Parser specifying how to handle operators
     * and what text to parse.
     */
    public Parser(OperatorManager op, InputStream theoryText) {
        this(theoryText);
        if (op != null)
            opManager = op;
    }

    /**
     * Creating a Parser specifying how to handle operators
     * and what text to parse.
     */
    public Parser(OperatorManager op, String theoryText) {
        this(theoryText);
        if (op != null)
            opManager = op;
    }

    /**
     * Creating a parser with default operator interpretation.
     */
    public Parser(String theoryText) {
        tokenizer = new Tokenizer(theoryText);
    }

    /**
     * Creating a parser with default operator interpretation.
     */
    public Parser(InputStream theoryText) {
        tokenizer = new Tokenizer(new BufferedReader(new InputStreamReader(theoryText)));
    }
    
//  user interface
    
    @Override
    public Iterator<Term> iterator() {
    	return new TermIterator(this);
    }

    /**
     * Parses next term from the stream built on string.
     * @param endNeeded <tt>true</tt> if it is required to parse the end token
     * (a period), <tt>false</tt> otherwise.
     * @throws InvalidTermException if a syntax error is found. 
     */
    public Term nextTerm(boolean endNeeded) throws InvalidTermException {
        try {
            Token t = tokenizer.readToken();
            if (t.isEOF())
                return null;

            tokenizer.unreadToken(t);
            Term term = expr(false);
            if (term == null)
            	throw new InvalidTermException("The parser is unable to finish.");

            if (endNeeded && tokenizer.readToken().getType() != Tokenizer.END)
                throw new InvalidTermException("The term " + term + " is not ended with a period.");

            term.resolveTerm();
            return term;
        } catch (IOException ex) {
            throw new InvalidTermException("An I/O error occured.");
        }
    }

    /**
     * Static service to get a term from its string representation.
     */
    public static Term parseSingleTerm(String st) throws InvalidTermException {
        return parseSingleTerm(st, null);
    }

    /**
     * Static service to get a term from its string representation,
     * providing a specific operator manager.
     */
    public static Term parseSingleTerm(String st, OperatorManager op) throws InvalidTermException {
        try {
            Parser p = new Parser(op, st);
            Token t = p.tokenizer.readToken();
            if (t.isEOF())
                throw new InvalidTermException("Term starts with EOF");
            
            p.tokenizer.unreadToken(t);
            Term term = p.expr(false);
            if (term == null)
                throw new InvalidTermException("Term is null");
            if (!p.tokenizer.readToken().isEOF())
                throw new InvalidTermException("The enitire string could not be read as one term");
            term.resolveTerm();
            return term;
        } catch (IOException ex) {
            throw new InvalidTermException("An I/O error occured");
        }
    }
    
    public int getCurrentLine() {
        return tokenizer.lineno();
    }
    
    // internal parsing procedures

    private Term expr(boolean commaIsEndMarker) throws InvalidTermException, IOException {
        return exprA(OperatorManager.OP_HIGH, commaIsEndMarker).result;
    }

    private IdentifiedTerm exprA(int maxPriority, boolean commaIsEndMarker) throws InvalidTermException, IOException {

        IdentifiedTerm leftSide = exprB(maxPriority, commaIsEndMarker);
//        if (leftSide == null)
//            return null;

        //{op(yfx,n) exprA(n-1) | op(yf,n)}*
        Token t = tokenizer.readToken();
        for (; t.isOperator(commaIsEndMarker); t = tokenizer.readToken()) {

            int YFX = opManager.opPrio(t.seq, "yfx");
            int YF = opManager.opPrio(t.seq, "yf");

            //YF and YFX has a higher priority than the left side expr and less then top limit
            // if (YF < leftSide.priority && YF > OperatorManager.OP_HIGH) YF = -1;
            if (YF < leftSide.priority || YF > maxPriority) YF = -1;
            // if (YFX < leftSide.priority && YFX > OperatorManager.OP_HIGH) YFX = -1;
            if (YFX < leftSide.priority || YFX > maxPriority) YFX = -1;

            //YFX has priority over YF
            if (YFX >= YF && YFX >= OperatorManager.OP_LOW){
                IdentifiedTerm ta = exprA(YFX-1, commaIsEndMarker);
                if (ta != null) {
                    leftSide = new IdentifiedTerm(YFX, new Struct(t.seq, leftSide.result, ta.result));
                    continue;
                }
            }
            //either YF has priority over YFX or YFX failed
            if (YF >= OperatorManager.OP_LOW) {
                leftSide = new IdentifiedTerm(YF, new Struct(t.seq, leftSide.result));
                continue;
            }
            break;
        }
        tokenizer.unreadToken(t);
        return leftSide;
    }

    private IdentifiedTerm exprB(int maxPriority, boolean commaIsEndMarker) throws InvalidTermException, IOException {

        //1. op(fx,n) exprA(n-1) | op(fy,n) exprA(n) | expr0
        IdentifiedTerm left = parseLeftSide(commaIsEndMarker, maxPriority);

        //2.left is followed by either xfx, xfy or xf operators, parse these
        Token operator = tokenizer.readToken();
        for (; operator.isOperator(commaIsEndMarker); operator = tokenizer.readToken()) {
            int XFX = opManager.opPrio(operator.seq, "xfx");
            int XFY = opManager.opPrio(operator.seq, "xfy");
            int XF = opManager.opPrio(operator.seq, "xf");

            //check that no operator has a priority higher than permitted
            //or a lower priority than the left side expression
            if (XFX > maxPriority || XFX < OperatorManager.OP_LOW) XFX = -1;
            if (XFY > maxPriority || XFY < OperatorManager.OP_LOW) XFY = -1;
            if (XF > maxPriority || XF < OperatorManager.OP_LOW) XF = -1;

            //XFX
            boolean haveAttemptedXFX = false;
            if (XFX >= XFY && XFX >= XF && XFX >= left.priority) {     //XFX has priority
                IdentifiedTerm found = exprA(XFX - 1, commaIsEndMarker);
                if (found != null) {
                    Struct xfx = new Struct(operator.seq, left.result, found.result);
                    left = new IdentifiedTerm(XFX, xfx);
                    continue;
                } else
                    haveAttemptedXFX = true;
            }
            //XFY
            if (XFY >= XF && XFY >= left.priority){           //XFY has priority, or XFX has failed
                IdentifiedTerm found = exprA(XFY, commaIsEndMarker);
                if (found != null) {
                    Struct xfy = new Struct(operator.seq, left.result, found.result);
                    left = new IdentifiedTerm(XFY, xfy);
                    continue;
                }
            }
            //XF
            if (XF >= left.priority)                   //XF has priority, or XFX and/or XFY has failed
                return new IdentifiedTerm(XF, new Struct(operator.seq, left.result));

            //XFX did not have top priority, but XFY failed
            if (!haveAttemptedXFX && XFX >= left.priority) {
                IdentifiedTerm found = exprA(XFX - 1, commaIsEndMarker);
                if (found != null) {
                    Struct xfx = new Struct(operator.seq, left.result, found.result);
                    left = new IdentifiedTerm(XFX, xfx);
                    continue;
                }
            }
            break;
        }
        tokenizer.unreadToken(operator);
        return left;
    }

    /**
     * Parses and returns a valid 'left side' of an expression.
     * If the left side starts with a prefix, it consumes other expressions with a lower priority than itself.
     * If the left side does not have a prefix it must be an expr0.
     *
     * @param commaIsEndMarker used when the left side is part of and argument list of expressions
     * @param maxPriority operators with a higher priority than this will effectively end the expression
     * @return a wrapper of: 1. term correctly structured and 2. the priority of its root operator
     * @throws InvalidTermException
     */
    private IdentifiedTerm parseLeftSide(boolean commaIsEndMarker, int maxPriority) throws InvalidTermException, IOException {
        //1. prefix expression
        Token f = tokenizer.readToken();
        if (f.isOperator(commaIsEndMarker)) {
            int FX = opManager.opPrio(f.seq, "fx");
            int FY = opManager.opPrio(f.seq, "fy");
            
            if (f.seq.equals("-")) {
            	Token t = tokenizer.readToken();
            	if (t.isNumber())
            		return new IdentifiedTerm(0, Parser.createNumber("-" + t.seq));
            	else
            		tokenizer.unreadToken(t);
            }

            //check that no operator has a priority higher than permitted
            if (FY > maxPriority) FY = -1;
            if (FX > maxPriority) FX = -1;


            //FX has priority over FY
            boolean haveAttemptedFX = false;
            if (FX >= FY && FX >= OperatorManager.OP_LOW){
                IdentifiedTerm found = exprA(FX-1, commaIsEndMarker);    //op(fx, n) exprA(n - 1)
                if (found != null)
                    return new IdentifiedTerm(FX, new Struct(f.seq, found.result));
                else
                    haveAttemptedFX = true;
            }
            //FY has priority over FX, or FX has failed
            if (FY >= OperatorManager.OP_LOW) {
                IdentifiedTerm found = exprA(FY, commaIsEndMarker); //op(fy,n) exprA(1200)  or   op(fy,n) exprA(n)
                if (found != null)
                    return new IdentifiedTerm(FY, new Struct(f.seq, found.result));
            }
            //FY has priority over FX, but FY failed
            if (!haveAttemptedFX && FX >= OperatorManager.OP_LOW) {
                IdentifiedTerm found = exprA(FX-1, commaIsEndMarker);    //op(fx, n) exprA(n - 1)
                if (found != null)
                    return new IdentifiedTerm(FX, new Struct(f.seq, found.result));
            }
        }
        tokenizer.unreadToken(f);
        //2. expr0
        return new IdentifiedTerm(0, expr0());
    }

    /**
     * exprA(0) ::= integer |
     *              float |
     *              variable |
     *              atom |
     *              atom( exprA(1200) { , exprA(1200) }* ) |
     *              '[' exprA(1200) { , exprA(1200) }* [ | exprA(1200) ] ']' |
     *              '{' [ exprA(1200) ] '}' |
     *              '(' exprA(1200) ')'
     */
    private Term expr0() throws InvalidTermException, IOException {
        Token t1 = tokenizer.readToken();

        if (t1.isType(Tokenizer.INTEGER))
            return Parser.parseInteger(t1.seq); // TODO moved method to Number

        if (t1.isType(Tokenizer.FLOAT))
            return Parser.parseFloat(t1.seq);   // TODO moved method to Number

        if (t1.isType(Tokenizer.VARIABLE))
            return new Var(t1.seq);             // TODO switched to use the internal check for "_" in Var(String)

        if (t1.isType(Tokenizer.ATOM) || t1.isType(Tokenizer.SQ_SEQUENCE) || t1.isType(Tokenizer.DQ_SEQUENCE)) {
            if (!t1.isFunctor())
                return new Struct(t1.seq);

            String functor = t1.seq;
            Token t2 = tokenizer.readToken();   //reading left par
            if (!t2.isType(Tokenizer.LPAR))
                throw new InvalidTermException("bug in parsing process. Something identified as functor misses its first left parenthesis");//todo check can be skipped
            LinkedList<Term> a = expr0_arglist();     //reading arguments
            Token t3 = tokenizer.readToken();
            if (t3.isType(Tokenizer.RPAR))      //reading right par
                return new Struct(functor, a);
            throw new InvalidTermException("Missing right parenthesis: ("+a + " -> here <-");
        }

        if (t1.isType(Tokenizer.LPAR)) {
            Term term = expr(false);
            if (tokenizer.readToken().isType(Tokenizer.RPAR))
                return term;
            throw new InvalidTermException("Missing right parenthesis: ("+term + " -> here <-");
        }

        if (t1.isType(Tokenizer.LBRA)) {
            Token t2 = tokenizer.readToken();
            if (t2.isType(Tokenizer.RBRA))
                return new Struct();

            tokenizer.unreadToken(t2);
            Term term = expr0_list();
            if (tokenizer.readToken().isType(Tokenizer.RBRA))
                return term;
            throw new InvalidTermException("Missing right bracket: ["+term + " -> here <-");
        }

        if (t1.isType(Tokenizer.LBRA2)) {
            Token t2 = tokenizer.readToken();
            if (t2.isType(Tokenizer.RBRA2))
                return new Struct("{}");

            tokenizer.unreadToken(t2);
            Term arg = expr(false);
            t2 = tokenizer.readToken();
            if (t2.isType(Tokenizer.RBRA2))
                return new Struct("{}", arg);
            throw new InvalidTermException("Missing right braces: {"+arg + " -> here <-");
        }

        throw new InvalidTermException("The following token could not be identified: "+t1.seq);
    }

    // TODO make non-recursive?
    private Term expr0_list() throws InvalidTermException, IOException {
        Term head = expr(true);
        Token t = tokenizer.readToken();
        if (",".equals(t.seq))
            return new Struct(head, expr0_list());
        if ("|".equals(t.seq))
            return new Struct(head, expr(true));
        if ("]".equals(t.seq)) {
            tokenizer.unreadToken(t);
            return new Struct(head, new Struct());
        }
        throw new InvalidTermException("The expression: " + head + "\nis not followed by either a ',' or '|'  or ']'.");
    }

    // TODO make non-recursive
    private LinkedList<Term> expr0_arglist() throws InvalidTermException, IOException {
        Term head = expr(true);
        Token t = tokenizer.readToken();
        if (",".equals(t.seq)) {
            LinkedList<Term> l = expr0_arglist();
            l.addFirst(head);
            return l;
        }
        if (")".equals(t.seq)) {
            tokenizer.unreadToken(t);
            LinkedList<Term> l = new LinkedList<Term>();
            l.add(head);
            return l;
        }
        throw new InvalidTermException("The argument: " + head + "\nis not followed by either a ',' or ')'.\nline: " + tokenizer.lineno());
    }
    
    // commodity methods to parse numbers

    static Number parseInteger(String s) {
        long num = java.lang.Long.parseLong(s);
        if (num > Integer.MIN_VALUE && num < Integer.MAX_VALUE)
        	return new Int((int) num);
        else
        	return new Long(num);
    }

    static Double parseFloat(String s) {
        double num = java.lang.Double.parseDouble(s);
        return new Double(num);
    }

    static Number createNumber(String s){
        try {
            return parseInteger(s);
        } catch (Exception e) {
            return parseFloat(s);
        }
    }
    
    /**
     * @return true if the String could be a prolog atom
     */
    public static boolean isAtom(String s) {
        return atom.matcher(s).matches();
    }
    
    private static Pattern atom = Pattern.compile("(!|[a-z][a-zA-Z_0-9]*)");
    
    //
    
    /**
     * This class represents an iterator of terms from Prolog text embedded
     * in a parser. Note that this class resembles more a generator than an
     * iterator type. In fact, both {@link TermIterator#next()} and
     * {@link TermIterator#hasNext()} throws {@link InvalidTermException} if
     * the next term they are trying to return or check for contains a syntax
     * error; this is due to both methods trying to generate the next term
     * instead of just returning it or checking for its existence from a pool
     * of already produced terms.
     */
    private static class TermIterator implements Iterator<Term> {
    	private Parser parser;
    	private boolean hasNext;
    	private Term next;
    	
    	TermIterator(Parser p) {
    		parser = p;
    		next = parser.nextTerm(true);
    		hasNext = (next != null);	
    	}
    	
    	@Override
    	public Term next() {
    		if (hasNext) {
    			if (next == null) {
    				next = parser.nextTerm(true);
    				if (next == null)
    					throw new NoSuchElementException();
    			}
    			hasNext = false;
    			Term temp = next;
    			next = null;
    			return temp;
    		} else
    			if (hasNext()) {
    				hasNext = false;
    				Term temp = next;
    				next = null;
    				return temp;
    			}
    		throw new NoSuchElementException();
    	}
    	
    	/**
    	 * @throws InvalidTermException if, while the parser checks for the
    	 * existence of the next term, a syntax error is encountered.
    	 */
    	@Override
    	public boolean hasNext() {
    		if (hasNext)
    			return hasNext;
    		next = parser.nextTerm(true);
    		if (next != null)
    			hasNext = true;
    		return hasNext;
    	}
    	
    	@Override
    	public void remove() {
    		throw new UnsupportedOperationException();
    	}	
    }

}