package de.skuzzle.polly.core.parser;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import de.skuzzle.polly.core.parser.PrecedenceTable.PrecedenceLevel;
import de.skuzzle.polly.core.parser.ast.Identifier;
import de.skuzzle.polly.core.parser.ast.ResolvableIdentifier;
import de.skuzzle.polly.core.parser.ast.Root;
import de.skuzzle.polly.core.parser.ast.declarations.Declaration;
import de.skuzzle.polly.core.parser.ast.declarations.Namespace;
import de.skuzzle.polly.core.parser.ast.declarations.types.MissingType;
import de.skuzzle.polly.core.parser.ast.declarations.types.ProductType;
import de.skuzzle.polly.core.parser.ast.declarations.types.Type;
import de.skuzzle.polly.core.parser.ast.directives.DelayDirective;
import de.skuzzle.polly.core.parser.ast.directives.Directive;
import de.skuzzle.polly.core.parser.ast.directives.ProblemDirective;
import de.skuzzle.polly.core.parser.ast.directives.ReinterpretDirctive;
import de.skuzzle.polly.core.parser.ast.expressions.Assignment;
import de.skuzzle.polly.core.parser.ast.expressions.Braced;
import de.skuzzle.polly.core.parser.ast.expressions.Call;
import de.skuzzle.polly.core.parser.ast.expressions.Delete;
import de.skuzzle.polly.core.parser.ast.expressions.Delete.DeleteableIdentifier;
import de.skuzzle.polly.core.parser.ast.expressions.Empty;
import de.skuzzle.polly.core.parser.ast.expressions.Expression;
import de.skuzzle.polly.core.parser.ast.expressions.Inspect;
import de.skuzzle.polly.core.parser.ast.expressions.NamespaceAccess;
import de.skuzzle.polly.core.parser.ast.expressions.OperatorCall;
import de.skuzzle.polly.core.parser.ast.expressions.Problem;
import de.skuzzle.polly.core.parser.ast.expressions.VarAccess;
import de.skuzzle.polly.core.parser.ast.expressions.literals.BooleanLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.ChannelLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.DateLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.FunctionLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.HelpLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.ListLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.NumberLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.ProductLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.StringLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.TimespanLiteral;
import de.skuzzle.polly.core.parser.ast.expressions.literals.UserLiteral;
import de.skuzzle.polly.core.parser.ast.lang.Operator.OpType;
import de.skuzzle.polly.core.parser.problems.ProblemReporter;
import de.skuzzle.polly.core.parser.problems.Problems;
import de.skuzzle.polly.tools.collections.LinkedStack;
import de.skuzzle.polly.tools.collections.Stack;
/**
* <p>This class provides recursive descent parsing for polly expressions and can output
* an abstract syntax tree for the parsed expression. The root of the AST is represented
* by the class {@link Root}, all AST nodes are subclasses of {@link Node}.
* Every AST node that is created by this parser gets assigned its actual
* {@link Position} within the input string. This allows to provide detailed error
* message during parsing, type-checking or execution of the AST.</p>
*
* <p>This parser uses the following context-free syntax, given in EBNF. There may exist
* some tweaks in the implementation that are not expressed in the following grammar.</p>
*
* <pre>
* root -> ':' ID (assign (WS assign)*)? // AST root with a WS separated list of expressions
*
* directives -> directive (',' directive)*
* directive -> DELAY secTerm
* | REINTERPRET
*
* assign -> relation '->' PUBLIC? TEMP? ID // assignment of relation to identifier X
* relation -> conjunction (REL_OP conjunction)* // relation (<,>,<=,>=,==, !=)
* conjunction -> disjunction (CONJ_OP disjunction)* // conjunction (||)
* disjunction -> secTerm (DISJ_OP secTerm)* // disjunction (&&)
* secTerm -> term (SECTERM_OP term)* // plus minus
* term -> factor (TERM_OP factor)* // multiplication and co
* factor -> postfix (FACTOR_OP factor)? // right-associative (power operator)
* postfix -> autolist (POSTFIX_OP autolist)* // postfix operator
* autolist -> dotdot (';' dotdot)* // implicit list literal
* dotdot -> unary ('..' unary ('$' unary)?)? // range operator with optional step size
* unary -> UNARY_OP unary // right-associative unary operator
* | call
* call -> access ( '(' parameters ')' )?
* access -> literal ('.' literal )? // namespace access. left operand must be a single identifier (represented by a VarAccess)
* literal -> ID // VarAccess
* | '(' relation ')' // braced expression
* | '\(' parameters ':' relation ')' // lambda function literal
* | '{' exprList '}' // concrete list of expressions
* | DELETE PUBLIC? ID (',' PUBLIC? ID)* // delete operator
* | INSPECT PUBLIC ID // inspect for public
* | INSPECT ID ('.' ID)? // inspect operator
* | IF relation ':' relation ':' relation // conditional operator
* | TRUE | FALSE // boolean literal
* | CHANNEL // channel literal
* | USER // user literal
* | STRING // string literal
* | NUMBER // number literal
* | DATETIME // date liter
* | TIMESPAN // timespan literal
* | '?' // HELP literal
* | RADIX literal // radix operator
*
* exprList -> (relation (',' relation)*)?
* parameters -> (parameter (',' parameter)*)?
* parameter -> type? ID
* type -> ID // primitive type
* | LIST '<' type '>' // list type
* | '(' (type (WS type)*)? '->' type ')' // function type
* | '?'
*
* WS -> ' ' | \t
* TEMP -> 'temp'
* PUBLIC -> 'public'
* IF -> 'if'
* TRUE -> 'true'
* FALSE -> 'false'
* CHANNEL -> '#' ID
* USER -> '@' ID
* STRING -> '"' .* '"'
* NUMBER -> [0-9]*(\.[0-9]+([eE][0-9]+)?)?
* TIMESPAN -> ([0-9]+[ywdhms])+
* DATE -> [0-9]{1,2}\.[0-9]{1,2}\.[0-9]{4}
* TIME -> [0-9]{1,2}:[0-9]{1,2}
* DATETIME -> TIME | DATE | DATE@TIME
* ID -> [_a-zA-Z][_a-zA-Z0-9]+
* | '\' . // any escaped token
* </pre>
*
* <p>This parser has simple support to report multiple problems during parsing. For
* incomplete expressions, {@link Problem} nodes are inserted in the resulting AST. For
* missing types will be created temporary types and the same applies to missing
* identifiers. Occurring problems will be reported to the outside using a
* {@link ProblemReporter} instance.</p>
*
* @author Simon Taddiken
*/
public class InputParser {
/** Operator precedence table */
protected final PrecedenceTable operators;
/** Stack which contains closing token types for currently parsed sub expressions */
private final Stack<TokenType> expressions;
/** Scanner that reads tokens from the input */
protected InputScanner scanner;
/** Cache for missing type references */
private final Map<String, Type> typeCache = new HashMap<String, Type>();
/** ID generator for missing identifiers */
private int missingId;
/** Used to report problems during parsing */
private final ProblemReporter reporter;
/**
* Creates a new parser which will use the provided scanner to read the tokens from.
* It will use the same {@link ProblemReporter} as the provided scanner.
*
* @param scanner The {@link InputScanner} which provides the token stream.
* @param reporter The ProblemReporter for this parser.
*/
public InputParser(InputScanner scanner, ProblemReporter reporter) {
this.scanner = scanner;
this.operators = new PrecedenceTable();
this.expressions = new LinkedStack<TokenType>();
this.reporter = reporter;
}
/**
* Creates a new parser which will parse the given input string using the default
* encoding.
*
* @param input The string to parse.
* @param reporter The ProblemReporter for this parser.
*/
public InputParser(String input, ProblemReporter reporter) {
this.scanner = new InputScanner(input);
this.operators = new PrecedenceTable();
this.expressions = new LinkedStack<TokenType>();
this.reporter = reporter;
}
/**
* Creates a new parser which will parse the given input string using the provided
* encoding.
*
* @param input The string to parse.
* @param encoding The charset name to use.
* @param reporter The ProblemReporter for this parser.
* @throws UnsupportedEncodingException If the charset name was invalid.
*/
public InputParser(String input, String encoding, ProblemReporter reporter)
throws UnsupportedEncodingException {
this.scanner = new InputScanner(input, Charset.forName(encoding));
this.operators = new PrecedenceTable();
this.expressions = new LinkedStack<TokenType>();
this.reporter = reporter;
}
/**
* Tries to parse the input string and returns the root of the AST. If the string was
* not valid, this method returns <code>null</code>.
*
* @return The parsed AST root or <code>null</code> if the string was not well
* formatted.
*/
public Root tryParse() {
try {
return this.parse();
} catch (ParseException e) {
e.printStackTrace();
}
return null;
}
/**
* Parses the input string and returns the root of the AST. If the string was not
* valid, this method will throw a {@link ParseException}.
*
* @return The parsed AST root.
* @throws ParseException If the string was not well formatted.
*/
public Root parse() throws ParseException {
return this.parseRoot();
}
/**
* Parses a single polly expression with no assignments and command prefix.
*
* @return The parsed AST expression.
* @throws ParseException If the string was not well formatted.
*/
public Expression parseSingleExpression() throws ParseException {
final Expression result = this.parseRelation();
this.expect(TokenType.EOS, false);
return result;
}
/**
* Creates a new {@link Identifier} with a generated name.
*
* @param position Position of the generated identifier.
* @return A new identifier.
*/
private Identifier missingIdentifier(Position position) {
return new Identifier(position, "$missing_" + (this.missingId++)); //$NON-NLS-1$
}
/**
* Tries to look up a primitive type by name. If no such type exists, a new
* temporary type with the requested name is created and stored in a cache. The
* next time a type with the same name is requested, that cached type will
* be returned. If polymorphism is allowed by {@link ParserProperties}, this method
* will create and cache a type variable of the passed identifiers name.
*
* @param name Type name to resolve.
* @return The resolved type.
* @throws ParseException
*/
private Type lookupType(Identifier name) throws ParseException {
Type result = Type.resolve(name);
if (result == null) {
result = this.typeCache.get(name.getId());
if (result == null
&& ParserProperties.should(ParserProperties.ALLOW_POLYMORPHIC_DECLS)) {
result = Type.newTypeVar(name);
} else if (result == null) {
result = new MissingType(name);
this.reporter.semanticProblem(Problems.UNKNOWN_TYPE, name.getPosition(),
name);
}
this.typeCache.put(name.getId(), result);
}
return result;
}
/**
* Reports a syntax error when an unexpected token is hit.
*
* @param expected The token that was expected.
* @param actual The token that actually occurred.
* @throws ParseException
*/
protected void reportExpected(TokenType expected, Token actual)
throws ParseException {
/*if (actual.matches(TokenType.CLOSEDBR)) {
this.reporter.syntaxProblem(Problems.MISSING_OBR,
this.scanner.spanFrom(actual));
} else {*/
this.reporter.syntaxProblem(expected, actual, this.scanner.spanFrom(actual));
//}
}
/**
* Expects the next token to have the type <code>expected</code>. If the next token is
* the expected one, it is consumed. If the next token represents a lexical error or
* has not the expected type, a problem is reported.
*
* <p>If a problem occurred and <code>insert</code> is <code>true</code>, this method
* pretends that the occurred token was the expected one and does not consume the
* token that occurred instead.</p>
*
* <p>If <code>insert</code> is <code>false</code>, the token that occurred instead
* of the expected one is consumed. This behaves like replacing the unexpected token
* with the expected.</p>
*
*
* @param expected Expected token type.
* @param insert Whether method should pretend that expected token occurred if it
* does not.
* @throws ParseException If the ProblemRetporter does not support multiple problems.
*/
protected void expect(TokenType expected, boolean insert) throws ParseException {
final Token la = this.scanner.lookAhead();
if (la.matches(TokenType.ERROR)) {
// report lexical error
this.scanner.consume();
this.reporter.lexicalProblem(la.getStringValue(), la.getPosition());
if (!insert) {
this.scanner.pushBackFirst(la);
}
} else if (!la.matches(expected)) {
// report unexpected token
this.scanner.consume();
this.reportExpected(expected, la);
this.scanner.pushBackFirst(la);
}
if (!insert || la.matches(expected)) {
// consume if token should not be inserted or was the expected one
this.scanner.consume();
}
}
/**
* Expects the next token to be an {@link Identifier}. If it is, it will be consumed
* and a new Identifier will be returned. If the next token represents a lexical
* error or is no identifier, a problem is reported.
*
* @return An {@link Identifier} created from the next token.
* @throws ParseException If the next token is no identifier.
*/
protected Identifier expectIdentifier() throws ParseException {
final Token la = this.scanner.lookAhead();
if (la.matches(TokenType.ERROR)) {
// report lexical error
this.reporter.lexicalProblem(la.getStringValue(), la.getPosition());
} else if (ParserProperties.should(ParserProperties.ENABLE_TOKEN_ESCAPING) &&
la.matches(TokenType.ESCAPED)) {
// create escaped identifier
this.scanner.consume();
final EscapedToken esc = (EscapedToken) la;
return new Identifier(esc.getPosition(), esc.getEscaped().getStringValue(),
true);
} else if (!la.matches(TokenType.IDENTIFIER)) {
// report missing identifier
this.scanner.consume();
this.reportExpected(TokenType.IDENTIFIER, la);
this.scanner.pushBackFirst(la);
return this.missingIdentifier(la.getPosition());
}
this.scanner.consume();
return new Identifier(la.getPosition(), la.getStringValue());
}
/**
* Consumes a single whitespace if the next token is one. If not, nothing happens.
* @throws ParseException If parsing fails.
*/
protected void allowSingleWhiteSpace() throws ParseException {
if (!this.scanner.skipWhiteSpaces()) {
this.scanner.match(TokenType.SEPERATOR);
}
}
/**
* Enters a new sub expression. If at least one expression is "entered", the scanner
* will ignore whitespaces.
* @param end The tokentype that could close this sub expression.
*/
protected void enterExpression(TokenType end) {
this.expressions.push(end);
this.scanner.setSkipWhiteSpaces(true);
}
/**
* Determines whether we currently parse a subexpression (<=> whether whitespaces
* are skipped.
*
* @return Whether we are currently parsing a subexpression where whitespaces are
* allowed.
*/
protected boolean inExpression() {
return !this.expressions.isEmpty();
}
/**
* Leaves an entered expression. If the last expression was left, the scanner will
* stop ignoring whitespaces.
*/
protected void leaveExpression() {
this.expressions.pop();
if (this.expressions.isEmpty()) {
this.scanner.setSkipWhiteSpaces(false);
}
}
protected Root parseRoot() throws ParseException {
Root root = null;
Token la = null;
Position start = null;
try {
la = this.scanner.lookAhead();
start = la.getPosition();
if (!this.scanner.match(TokenType.COLON)) {
return null;
}
la = this.scanner.lookAhead();
if (!this.scanner.match(TokenType.IDENTIFIER)) {
return null;
}
} catch (ParseException ignore) {
// if an error occurs at this early stage of parsing, return null to
// show that input was invalid.
return null;
}
final Identifier cmd = new Identifier(
new Position(start.getStart(), la.getPosition().getEnd()),
la.getStringValue());
// min length hack to ignore smilies
if (cmd.getId().length() < ParserProperties.getInt(
ParserProperties.COMMAND_MIN_LENGTH)) {
return null;
}
final List<Expression> signature = new ArrayList<Expression>();
if (this.scanner.match(TokenType.SEPERATOR)) {
do {
final Expression next = this.parseAssignment();
signature.add(next);
} while (this.scanner.match(TokenType.SEPERATOR));
}
final Map<TokenType, Directive> directives = new HashMap<>();
if (this.scanner.match(TokenType.COMMA)) {
this.parseDirectives(directives);
}
this.expect(TokenType.EOS, false);
root = new Root(this.scanner.spanFrom(start), cmd, signature,
this.reporter.hasProblems(), directives);
return root;
}
/**
* Parses a list of comma separated directives
* <pre>
* directives -> directive (',' directive)*
* </pre>
* @param directives List into which parsed directives are inserted
* @throws ParseException If parsing fails.
*/
protected void parseDirectives(Map<TokenType, Directive> directives) throws ParseException {
do {
final Directive dir = this.parseDirective();
if (directives.containsKey(dir.getDirectiveType())) {
// directive already exists
this.reporter.semanticProblem(Problems.DUPLICATED_DIRECTIVE,
dir.getPosition(), dir.getDirectiveType());
} else {
directives.put(dir.getDirectiveType(), dir);
}
} while (this.scanner.match(TokenType.COMMA));
}
/**
* Parses a single directive
* <pre>
* directive -> DELAY ' ' secTerm
* | REINTERPRET
* </pre>
* @return The parsed directive
* @throws ParseException If parsing fails.
*/
protected Directive parseDirective() throws ParseException {
final Token la = this.scanner.lookAhead();
switch (la.getType()) {
case DELAY:
this.scanner.consume();
this.expect(TokenType.SEPERATOR, true);
final Expression target = this.parseSecTerm();
return new DelayDirective(this.scanner.spanFrom(la), target);
case REINTERPRET:
this.scanner.consume();
return new ReinterpretDirctive(this.scanner.spanFrom(la));
default:
this.expect(TokenType.DIRECTIVE, true);
return new ProblemDirective(this.scanner.spanFrom(la));
}
}
/**
* Parses an assignment. If no ASSIGN_OP is found, the result of the next
* higher precedence level is returned. This is the root of all expressions and has
* thus lowest precedence level.
* <pre>
* assign -> relation '->'PUBLIC? TEMP? ID // assignment of relation to identifier X
* </pre>
* @return The parsed Assignment or the result of the next higher precedence level
* if no ASSIGN_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseAssignment() throws ParseException {
final Expression lhs = this.parseRelation();
if (this.scanner.match(TokenType.ASSIGNMENT)) {
this.allowSingleWhiteSpace();
final boolean pblc = this.scanner.match(TokenType.PUBLIC);
this.allowSingleWhiteSpace();
final boolean temp = this.scanner.match(TokenType.TEMP);
this.allowSingleWhiteSpace();
final Identifier id = this.expectIdentifier();
return new Assignment(
new Position(lhs.getPosition(), id.getPosition()),
lhs, id, pblc, temp);
}
return lhs;
}
/**
* Parses RELATION precedence level operators.
* <pre>
* relation -> conjunction (REL_OP conjunction)* // relation (<,>,<=,>=,==)
* </pre>
* @return The parsed operator call or the result from the next higher precedence
* level if no REL_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseRelation() throws ParseException {
Expression expr = this.parseConjunction();
Token la = this.scanner.lookAhead();
while (this.operators.match(la, PrecedenceLevel.RELATION)) {
this.scanner.consume();
// ISSUE #12: this was a right shift
if (la.matches(TokenType.GT) && this.scanner.lookAhead().matches(TokenType.GT)) {
return expr;
}
final Expression rhs = this.parseConjunction();
expr = OperatorCall.binary(
new Position(expr.getPosition(), rhs.getPosition()),
OpType.fromToken(la), expr, rhs);
la = this.scanner.lookAhead();
}
return expr;
}
/**
* Parses CONJUNCTION precedence level operators.
* <pre>
* conjunction -> disjunction (CONJ_OP disjunction)* // conjunction (||)
* </pre>
* @return The parsed operator call or the result from the next higher precedence
* level if no CONJ_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseConjunction() throws ParseException {
Expression expr = this.parseDisjunction();
Token la = this.scanner.lookAhead();
while (this.operators.match(la, PrecedenceLevel.CONJUNCTION)) {
this.scanner.consume();
final Expression rhs = this.parseDisjunction();
expr = OperatorCall.binary(
new Position(expr.getPosition(), rhs.getPosition()),
OpType.fromToken(la), expr, rhs);
la = this.scanner.lookAhead();
}
return expr;
}
/**
* Parses DISJUNCTION precedence level operators.
* <pre>
* disjunction -> secTerm (DISJ_OP secTerm)* // disjunction (&&)
* </pre>
* @return The parsed operator call or the result from the next higher precedence
* level if no DISJ_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseDisjunction() throws ParseException {
Expression expr = this.parseSecTerm();
Token la = this.scanner.lookAhead();
while (this.operators.match(la, PrecedenceLevel.DISJUNCTION)) {
this.scanner.consume();
final Expression rhs = this.parseSecTerm();
expr = OperatorCall.binary(
new Position(expr.getPosition(), rhs.getPosition()),
OpType.fromToken(la), expr, rhs);
la = this.scanner.lookAhead();
}
return expr;
}
/**
* Parses SECTERM precedence level operators.
* <pre>
* secTerm -> term (SECTERM_OP term)* // plus minus
* </pre>
* @return The parsed operator call or the result from the next higher precedence
* level if no SECTERM_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseSecTerm() throws ParseException {
Expression expr = this.parseTerm();
Token la = this.scanner.lookAhead();
while (this.operators.match(la, PrecedenceLevel.SECTERM)) {
this.scanner.consume();
// ISSUE #12: this is a (unsigned) right shift
if (la.matches(TokenType.GT) && this.scanner.match(TokenType.GT)) {
if (this.scanner.match(TokenType.GT)) {
la = new Token(TokenType.URIGHT_SHIFT, this.scanner.spanFrom(la));
} else {
la = new Token(TokenType.RIGHT_SHIFT, this.scanner.spanFrom(la));
}
}
final Expression rhs = this.parseTerm();
expr = OperatorCall.binary(
new Position(expr.getPosition(), rhs.getPosition()),
OpType.fromToken(la), expr, rhs);
la = this.scanner.lookAhead();
}
return expr;
}
/**
* Parses TERM precedence level operators.
* <pre>
* term -> factor (TERM_OP factor)* // multiplication and co
* </pre>
* @return The parsed operator call or the result from the next higher precedence
* level if no TERM_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseTerm() throws ParseException {
Expression expr = this.parseFactor();
Token la = this.scanner.lookAhead();
while (this.operators.match(la, PrecedenceLevel.TERM)) {
// ISSUE 0000099: If Identifier or open brace, do not consume the token but
// pretend it was a multiplication
if (la.matches(TokenType.IDENTIFIER) || la.matches(TokenType.OPENBR)) {
la = new Token(TokenType.MUL, la.getPosition());
} else {
this.scanner.consume();
}
final Expression rhs = this.parseFactor();
expr = OperatorCall.binary(
new Position(expr.getPosition(), rhs.getPosition()),
OpType.fromToken(la), expr, rhs);
la = this.scanner.lookAhead();
}
return expr;
}
/**
* Parses FACTOR precedence operators. Result will be a nested OperatorCall or the
* result of the next higher precedence level if no FACTOR_OP was found.
* FACTOR operators are right-associative.
* <pre>
* factor -> postfix (FACTOR_OP factor)? // right-associative (power operator)
* </pre>
* @return The parsed operator call or the result from the next higher precedence
* level if no FACTOR_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseFactor() throws ParseException {
Expression expr = this.parsePostfix();
Token la = this.scanner.lookAhead();
if (this.operators.match(la, PrecedenceLevel.FACTOR)) {
this.scanner.consume();
final Expression rhs = this.parseFactor();
expr = OperatorCall.binary(
new Position(expr.getPosition(), rhs.getPosition()),
OpType.fromToken(la), expr, rhs);
}
return expr;
}
/**
* Parses a postfix operator. This may be either of the random index- or the
* concrete index operator.
* <pre>
* postfix -> autolist (POSTFIX_OP autolist)* // postfix operator
* </pre>
* @return Either the parsed postifx operator call or the expression from the next
* higher precedence level if no POSTFIX_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parsePostfix() throws ParseException {
Expression lhs = this.parseAutoList();
Token la = this.scanner.lookAhead();
while (this.operators.match(la, PrecedenceLevel.POSTFIX)) {
this.scanner.consume();
if (la.matches(TokenType.OPENSQBR)) {
// index operator
final Expression rhs = this.parseAutoList();
this.expect(TokenType.CLOSEDSQBR, true);
lhs = OperatorCall.binary(
this.scanner.spanFrom(la),
OpType.fromToken(la), lhs, rhs);
} else {
// ? or ?! operator
final Position endPos = this.scanner.spanFrom(la);
return OperatorCall.unary(
new Position(lhs.getPosition(), endPos),
OpType.fromToken(la), lhs, true);
}
la = this.scanner.lookAhead();
}
return lhs;
}
/**
* Parses an implicit list literal.
* <pre>
* autolist -> dotdot (';' dotdot)* // implicit list literal
* </pre>
* @return Either a {@link ListLiteral} containing the following expressions or the
* expression returned by the next higher precedence level.
* @throws ParseException If parsing fails.
*/
protected Expression parseAutoList() throws ParseException {
Expression lhs = this.parseDotDot();
final Token la = this.scanner.lookAhead();
if (la.matches(TokenType.SEMICOLON)) {
final List<Expression> content = new ArrayList<Expression>();
content.add(lhs);
Expression last = null;
while (this.scanner.match(TokenType.SEMICOLON)) {
last = this.parseDotDot();
content.add(last);
}
// invariant: last cannot be null here!
return new ListLiteral(
new Position(lhs.getPosition(), last.getPosition()),
content);
}
return lhs;
}
/**
* Parses the '..' range operator, which can either be a binary operator or
* a ternary operator if an additional step size is recognized.
*
* <pre>
* dotdot -> unary ('..' unary ('$' unary)?)? // range operator with optional
* // step size
* </pre>
* @return The parsed operator or the expression from the next higher precedence level
* if no DOTDOT operator was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseDotDot() throws ParseException {
final Expression lhs = this.parseUnary();
final Token la = this.scanner.lookAhead();
if (this.operators.match(la, PrecedenceLevel.DOTDOT)) {
this.scanner.consume();
final Expression endRange = this.parseUnary();
// default step width of 1 (if dollar is ommitted)
Expression operand3 = new NumberLiteral(endRange.getPosition(), 1.0);
if (this.scanner.match(TokenType.DOLLAR)) {
operand3 = this.parseUnary();
}
return OperatorCall.ternary(
new Position(lhs.getPosition(), operand3.getPosition()),
OpType.fromToken(la), lhs, endRange, operand3);
}
return lhs;
}
/**
* Parses an unary operator call.
* <pre>
* unary -> UNARY_OP unary // right-associative unary operator
* | call
* </pre>
* @return A unary operator call or the expression returned by the next higher
* precedence level if no UNARY_OP was found.
* @throws ParseException If parsing fails.
*/
protected Expression parseUnary() throws ParseException {
final Token la = this.scanner.lookAhead();
if (this.operators.match(la, PrecedenceLevel.UNARY)) {
this.scanner.consume();
final Expression rhs = this.parseUnary();
return OperatorCall.unary(new Position(la.getPosition(),
rhs.getPosition()), OpType.fromToken(la), rhs, false);
} else {
return this.parseCall();
}
}
/**
* Parses a function call. If no open braces was matched, the result of the next
* higher precedence level will be returned.
*
* <pre>
* call -> access ( '(' parameters ')' )?
* </pre>
* @return The call statement of the result of the next higher precedence level if
* this was no call.
* @throws ParseException If parsing fails
*/
protected Expression parseCall() throws ParseException {
final Expression lhs = this.parseNamespaceAccess();
final Token la = this.scanner.lookAhead();
if (this.scanner.match(TokenType.OPENBR)) {
final List<Expression> params = this.parseExpressionList(
TokenType.CLOSEDBR);
final ProductLiteral pl = new ProductLiteral(
this.scanner.spanFrom(la), params);
this.expect(TokenType.CLOSEDBR, true);
return new Call(
new Position(lhs.getPosition().getStart(), this.scanner.getStreamIndex()),
lhs, pl);
}
return lhs;
}
/**
* Parses a {@link Namespace} access.
* <pre>
* access -> literal ('.' literal)?
* </pre>
*
* @return The parsed literal if no DOT operator was found, or a {@link Namespace}
* access if the was a dot followed by a VarOrCall.
* @throws ParseException If parsing fails
*/
protected Expression parseNamespaceAccess() throws ParseException {
final Expression lhs = this.parseLiteral();
final Token la = this.scanner.lookAhead();
if (this.scanner.match(TokenType.DOT)) {
final Expression rhs = this.parseLiteral();
return new NamespaceAccess(new Position(lhs.getPosition(),
this.scanner.spanFrom(la)), lhs, rhs);
}
return lhs;
}
/**
* Parses the highest precedence level which is mostly a single literal, but also
* a delete or if statement.
*
* <pre>
* literal -> ID // VarAccess
* | ESCAPED // token escape
* | '(' relation ')' // braced expression
* | '\(' parameters ':' relation ')' // lambda function literal
* | '{' exprList '}' // concrete list of expressions
* | DELETE PUBLIC? ID (',' PUBLIC? ID)* // delete operator
* | INSPECT PUBLIC ID // inspect for public
* | INSPECT ID ('.' ID)? // inspect operator
* | IF expr ':' relation ':' relation // conditional operator
* | TRUE | FALSE // boolean literal
* | CHANNEL // channel literal
* | USER // user literal
* | STRING // string literal
* | NUMBER // number literal
* | DATETIME // date literal
* | TIMESPAN // timespan literal
* | '?' // HELP literal
* | RADIX literal // radixed int
* </pre>
*
* @return The parsed expression.
* @throws ParseException If parsing fails.
*/
protected Expression parseLiteral() throws ParseException {
final Token la = this.scanner.lookAhead();
Expression exp = null;
switch(la.getType()) {
case ESCAPED:
this.scanner.consume();
final EscapedToken escaped = (EscapedToken) la;
final ResolvableIdentifier escId = new ResolvableIdentifier(la.getPosition(),
escaped.getEscaped().getStringValue(), true);
return new VarAccess(la.getPosition(), escId);
case IDENTIFIER:
this.scanner.consume();
final ResolvableIdentifier id = new ResolvableIdentifier(
la.getPosition(), la.getStringValue(), false);
return new VarAccess(id.getPosition(), id);
case OPENBR:
this.scanner.consume();
/*
* Now we can ignore whitespaces until the matching closing brace is
* read.
*/
this.enterExpression(TokenType.CLOSEDBR);
exp = this.parseRelation();
this.expect(TokenType.CLOSEDBR, true);
this.leaveExpression();
return new Braced(this.scanner.spanFrom(la), exp);
case LAMBDA:
this.scanner.consume();
this.enterExpression(TokenType.CLOSEDBR);
final Collection<Declaration> formal = this.parseParameters(
TokenType.COLON);
this.expect(TokenType.COLON, true);
exp = this.parseRelation();
this.expect(TokenType.CLOSEDBR, true);
final FunctionLiteral func = new FunctionLiteral(
this.scanner.spanFrom(la), formal, exp);
this.leaveExpression();
return func;
case OPENCURLBR:
this.scanner.consume();
this.enterExpression(TokenType.CLOSEDCURLBR);
final List<Expression> elements = this.parseExpressionList(
TokenType.CLOSEDCURLBR);
this.expect(TokenType.CLOSEDCURLBR, true);
this.leaveExpression();
final ListLiteral list = new ListLiteral(this.scanner.spanFrom(la),
elements);
list.setPosition(this.scanner.spanFrom(la));
return list;
case DELETE:
this.scanner.consume();
this.allowSingleWhiteSpace();
final List<DeleteableIdentifier> ids = new ArrayList<DeleteableIdentifier>();
do {
this.allowSingleWhiteSpace();
boolean global = this.scanner.match(TokenType.PUBLIC);
if (global) {
this.allowSingleWhiteSpace();
}
ids.add(new DeleteableIdentifier(this.expectIdentifier(), global));
} while (this.scanner.match(TokenType.COMMA));
return new Delete(this.scanner.spanFrom(la), ids);
case INSPECT:
this.scanner.consume();
this.allowSingleWhiteSpace();
final Token glob = this.scanner.lookAhead();
final boolean global = this.scanner.match(TokenType.PUBLIC);
this.allowSingleWhiteSpace();
final ResolvableIdentifier name = new ResolvableIdentifier(
this.expectIdentifier());
final VarAccess va1 = new VarAccess(name.getPosition(), name);
Expression result = va1;
if (global) {
// syntactic sugar for global inspect
final ResolvableIdentifier name2 = new ResolvableIdentifier(
glob.getPosition(), Namespace.PUBLIC_NAMESPACE_NAME);
final VarAccess va2 = new VarAccess(name2.getPosition(), name2);
result = new NamespaceAccess(this.scanner.spanFrom(la), va2, va1);
} else if (this.scanner.match(TokenType.DOT)) {
final ResolvableIdentifier name2 = new ResolvableIdentifier(
this.expectIdentifier());
final VarAccess va2 = new VarAccess(name2.getPosition(), name2);
result = new NamespaceAccess(this.scanner.spanFrom(la), va1, va2);
}
return new Inspect(this.scanner.spanFrom(la), result, global);
case IF:
this.scanner.consume();
this.allowSingleWhiteSpace();
final Expression condition = this.parseRelation();
this.allowSingleWhiteSpace();
this.expect(TokenType.COLON, true);
this.allowSingleWhiteSpace();
final Expression second = this.parseRelation();
this.allowSingleWhiteSpace();
this.expect(TokenType.COLON, true);
this.allowSingleWhiteSpace();
final Expression third = this.parseRelation();
return OperatorCall.ternary(this.scanner.spanFrom(la), OpType.IF,
condition, second, third);
case TRUE:
this.scanner.consume();
return new BooleanLiteral(la.getPosition(), true);
case FALSE:
this.scanner.consume();
return new BooleanLiteral(la.getPosition(), false);
case CHANNEL:
this.scanner.consume();
return new ChannelLiteral(la.getPosition(), la.getStringValue());
case USER:
this.scanner.consume();
return new UserLiteral(la.getPosition(), la.getStringValue());
case STRING:
this.scanner.consume();
return new StringLiteral(la.getPosition(), la.getStringValue());
case NUMBER:
this.scanner.consume();
return new NumberLiteral(la.getPosition(), la.getFloatValue());
case DATETIME:
this.scanner.consume();
return new DateLiteral(la.getPosition(), la.getDateValue());
case TIMESPAN:
this.scanner.consume();
return new TimespanLiteral(la.getPosition(), (int)la.getLongValue());
case QUESTION:
this.scanner.consume();
return new HelpLiteral(la.getPosition());
case RADIX:
this.scanner.consume();
final NumberLiteral radix = new NumberLiteral(la.getPosition(),
la.getLongValue());
final Expression rhs = this.parseLiteral();
return OperatorCall.binary(this.scanner.spanFrom(la), OpType.RADIX,
radix, rhs);
default:
this.expect(TokenType.LITERAL, true);
return new Problem(this.scanner.spanFrom(la));
}
}
/**
* Parses a comma separated list of expressions. The <code>end</code> token type
* exists only for determining empty lists.
*
* <pre>
* exprList -> end // empty list
* | relation (',' relation)*
* </pre>
* @param end The token which should end the list. Only used to determine empty lists.
* @return A collection of parsed expressions.
* @throws ParseException If parsing fails.
*/
protected List<Expression> parseExpressionList(TokenType end)
throws ParseException {
// do not consume here. end token is consume by the caller
if (this.scanner.lookAhead().matches(end)) {
// empty list
return new ArrayList<Expression>(0);
}
this.enterExpression(end);
final List<Expression> result = new ArrayList<Expression>();
result.add(this.parseRelation());
while (this.scanner.match(TokenType.COMMA)) {
this.allowSingleWhiteSpace();
result.add(this.parseRelation());
}
this.leaveExpression();
return result;
}
/**
* Parses a list of formal parameters that ends with the token type <code>end</code>.
*
* <pre>
* parameters -> end // empty list
* | parameter (',' parameter)*
* </pre>
* @param end The token that the list is supposed to end with (to determine empty
* lists, token won't be consumed if hit).
* @return Collection of parsed formal parameters.
* @throws ParseException If parsing fails.
*/
protected List<Declaration> parseParameters(TokenType end)
throws ParseException {
if (this.scanner.lookAhead().matches(end)) {
// empty list.
return new ArrayList<Declaration>(0);
}
this.enterExpression(end);
final List<Declaration> result = new ArrayList<Declaration>();
result.add(this.parseParameter());
while (this.scanner.match(TokenType.COMMA)) {
this.allowSingleWhiteSpace();
result.add(this.parseParameter());
}
this.leaveExpression();
return result;
}
/**
* <pre>
* parameter -> type? ID
* </pre>
* @return The parsed parameter.
* @throws ParseException If parsing fails.
*/
protected Declaration parseParameter() throws ParseException {
final Type type;
final Identifier name;
final Token la = this.scanner.lookAhead();
if (la.matches(TokenType.IDENTIFIER)) {
this.scanner.consume();
final Token la2 = this.scanner.lookAhead();
if (la2.matches(TokenType.IDENTIFIER)) {
// ID ID
type = this.lookupType(new Identifier(la.getPosition(),
la.getStringValue()));
name = this.expectIdentifier();
} else {
type = Type.newTypeVar();
name = new Identifier(la.getPosition(), la.getStringValue());
}
} else {
type = this.parseType();
name = this.expectIdentifier();
}
return new Declaration(this.scanner.spanFrom(la), name,
new Empty(type, this.scanner.spanFrom(la)));
}
/**
* <pre>
* type -> ID // primitive type
* | LIST '<' type '>' // list type
* | '(' (type (WS type)*)? '->' type ')' // function type
* | '?'
* </pre>
* @return A resolvable type.
* @throws ParseException If parsing fails.
*/
protected Type parseType() throws ParseException {
if (this.scanner.match(TokenType.OPENBR)) {
final List<Type> signature = new ArrayList<Type>();
final boolean skipWS = this.scanner.skipWhiteSpaces();
this.scanner.setSkipWhiteSpaces(false);
do {
if (scanner.lookAhead().matches(TokenType.ASSIGNMENT)) {
break;
}
signature.add(this.parseType());
} while (this.scanner.match(TokenType.SEPERATOR) &&
!this.scanner.lookAhead().matches(TokenType.ASSIGNMENT));
this.scanner.setSkipWhiteSpaces(skipWS);
this.allowSingleWhiteSpace();
this.expect(TokenType.ASSIGNMENT, true);
final Type resultType = this.parseType();
this.allowSingleWhiteSpace();
this.expect(TokenType.CLOSEDBR, true);
if (signature.isEmpty()) {
signature.add(Type.VOID);
}
return new ProductType(signature).mapTo(resultType);
} else if (this.scanner.match(TokenType.LIST)) {
this.expect(TokenType.LT, true);
final Type subType = this.parseType();
this.expect(TokenType.GT, true);
return subType.listOf();
} else if (ParserProperties.should(ParserProperties.ALLOW_POLYMORPHIC_DECLS)
&& this.scanner.match(TokenType.QUESTION)) {
return Type.newTypeVar();
} else {
return this.lookupType(this.expectIdentifier());
}
}
}