// Copyright (c) 2011, David J. Pearce (djp@ecs.vuw.ac.nz) // All rights reserved. // // This software may be modified and distributed under the terms // of the BSD license. See the LICENSE file for details. package wyc.io; import java.io.File; import java.math.BigDecimal; import java.math.BigInteger; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import wybs.lang.Attribute; import wybs.lang.NameID; import wybs.lang.SyntacticElement; import wybs.lang.SyntaxError; import wyc.lang.*; import wyc.lang.Expr.ConstantAccess; import wyc.io.WhileyFileLexer.Token; import static wyil.util.ErrorMessages.*; import static wybs.lang.SyntaxError.*; import static wyc.io.WhileyFileLexer.Token.Kind.*; import wyc.lang.WhileyFile.*; import wycc.util.Pair; import wycc.util.Triple; import wyfs.lang.Path; import wyfs.util.Trie; import wyil.lang.Modifier; import wyil.lang.Constant; /** * Convert a list of tokens into an Abstract Syntax Tree (AST) representing the * original source file in question. No effort is made to check whether or not * the generated tree is syntactically correct. Subsequent stages of the * compiler are responsible for doing this. * * @author David J. Pearce * */ public class WhileyFileParser { private final Path.Entry<WhileyFile> entry; private ArrayList<Token> tokens; private int index; public WhileyFileParser(Path.Entry<WhileyFile> entry, List<Token> tokens) { this.entry = entry; this.tokens = new ArrayList<>(tokens); } /** * Read a <code>WhileyFile</code> from the token stream. If the stream is * invalid in some way (e.g. contains a syntax error, etc) then a * <code>SyntaxError</code> is thrown. * * @return */ public WhileyFile read() { Path.ID pkg = parsePackage(); WhileyFile wf = new WhileyFile(entry); // FIXME: check package is consistent? skipWhiteSpace(); while (index < tokens.size()) { Token lookahead = tokens.get(index); if (lookahead.kind == Import) { parseImportDeclaration(wf); } else { List<Modifier> modifiers = parseModifiers(); checkNotEof(); lookahead = tokens.get(index); if (lookahead.text.equals("type")) { parseTypeDeclaration(wf, modifiers); } else if (lookahead.text.equals("constant")) { parseConstantDeclaration(wf, modifiers); } else if (lookahead.kind == Function) { parseFunctionOrMethodDeclaration(wf, modifiers, true); } else if (lookahead.kind == Method) { parseFunctionOrMethodDeclaration(wf, modifiers, false); } else if (lookahead.kind == Property) { parsePropertyDeclaration(wf, modifiers); } else { syntaxError("unrecognised declaration", lookahead); } } skipWhiteSpace(); } return wf; } private Trie parsePackage() { Trie pkg = Trie.ROOT; if (tryAndMatch(true, Package) != null) { // found a package keyword pkg = pkg.append(match(Identifier).text); while (tryAndMatch(true, Dot) != null) { pkg = pkg.append(match(Identifier).text); } matchEndLine(); return pkg; } else { return pkg; // no package } } /** * Parse an import declaration, which is of the form: * * <pre> * ImportDecl ::= Identifier ["from" ('*' | Identifier)] ( ('.' | '..') ('*' | Identifier) )* * </pre> * * @param wf */ private void parseImportDeclaration(WhileyFile wf) { int start = index; match(Import); // First, parse "from" usage (if applicable) Token token = tryAndMatch(true, Identifier, Star); if (token == null) { syntaxError("expected identifier or '*' here", tokens.get(index)); } String name = token.text; // NOTE: we don't specify "from" as a keyword because this prevents it // from being used as a variable identifier. Token lookahead; if ((lookahead = tryAndMatchOnLine(Identifier)) != null) { // Ok, this must be "from" if (!lookahead.text.equals("from")) { syntaxError("expected \"from\" here", lookahead); } token = match(Identifier); } // Second, parse package string Trie filter = Trie.ROOT.append(token.text); token = null; while ((token = tryAndMatch(true, Dot, DotDot)) != null) { if (token.kind == DotDot) { filter = filter.append("**"); } if (tryAndMatch(true, Star) != null) { filter = filter.append("*"); } else { filter = filter.append(match(Identifier).text); } } int end = index; matchEndLine(); wf.add(new WhileyFile.Import(filter, name, sourceAttr(start, end - 1))); } private List<Modifier> parseModifiers() { ArrayList<Modifier> mods = new ArrayList<>(); Token lookahead; boolean visible = false; while ((lookahead = tryAndMatch(true, Public, Private, Native, Export)) != null) { switch (lookahead.kind) { case Public: case Private: if (visible) { syntaxError("visibility modifier already given", lookahead); } } switch (lookahead.kind) { case Public: mods.add(Modifier.PUBLIC); visible = true; break; case Private: mods.add(Modifier.PRIVATE); visible = true; break; case Native: mods.add(Modifier.NATIVE); break; case Export: mods.add(Modifier.EXPORT); break; } } return mods; } /** * Parse a <i>function declaration</i> or <i>method declaration</i>, which * have the form: * * <pre> * FunctionDeclaration ::= "function" TypePattern "->" TypePattern (FunctionMethodClause)* ':' NewLine Block * * MethodDeclaration ::= "method" TypePattern "->" TypePattern (FunctionMethodClause)* ':' NewLine Block * * FunctionMethodClause ::= "requires" Expr | "ensures" Expr * </pre> * * Here, the first type pattern (i.e. before "->") is referred to as the * "parameter", whilst the second is referred to as the "return". There are * two kinds of option clause: * * <ul> * <li><b>Requires clause</b>. This defines a constraint on the permissible * values of the parameters on entry to the function or method, and is often * referred to as the "precondition". This expression may refer to any * variables declared within the parameter type pattern. Multiple clauses * may be given, and these are taken together as a conjunction. Furthermore, * the convention is to specify the requires clause(s) before any ensure(s) * clauses.</li> * <li><b>Ensures clause</b>. This defines a constraint on the permissible * values of the the function or method's return value, and is often * referred to as the "postcondition". This expression may refer to any * variables declared within either the parameter or return type pattern. * Multiple clauses may be given, and these are taken together as a * conjunction. Furthermore, the convention is to specify the requires * clause(s) after the others.</li> * </ul> * * <p> * The following function declaration provides a small example to * illustrate: * </p> * * <pre> * function max(int x, int y) -> (int z) * // return must be greater than either parameter * ensures x <= z && y <= z * // return must equal one of the parmaeters * ensures x == z || y == z: * ... * </pre> * * <p> * Here, we see the specification for the well-known <code>max()</code> * function which returns the largest of its parameters. This does not throw * any exceptions, and does not enforce any preconditions on its parameters. * </p> */ private void parseFunctionOrMethodDeclaration(WhileyFile wf, List<Modifier> modifiers, boolean isFunction) { int start = index; EnclosingScope scope = new EnclosingScope(); List<String> lifetimeParameters; if (isFunction) { match(Function); lifetimeParameters = Collections.emptyList(); } else { match(Method); // Lifetime parameters lifetimeParameters = parseOptionalLifetimeParameters(scope); } Token name = match(Identifier); // Parse function or method parameters List<Parameter> parameters = parseParameters(wf, scope); // Parse (optional) return type List<Parameter> returns = Collections.EMPTY_LIST; if (tryAndMatch(true, MinusGreater) != null) { // Explicit return type is given, so parse it! We first clone the // environent and create a special one only for use within ensures // clauses, since these are the only expressions which may refer to // variables declared in the return type. returns = parseOptionalParameters(wf, scope); } // Parse optional requires/ensures clauses ArrayList<Expr> requires = new ArrayList<>(); ArrayList<Expr> ensures = new ArrayList<>(); Token lookahead; while ((lookahead = tryAndMatch(true, Requires, Ensures)) != null) { switch (lookahead.kind) { case Requires: // NOTE: expression terminated by ':' requires.add(parseLogicalExpression(wf, scope, true)); break; case Ensures: // Use the ensuresEnvironment here to get access to any // variables declared in the return type pattern. // NOTE: expression terminated by ':' ensures.add(parseLogicalExpression(wf, scope, true)); break; } } // At this point, we need to decide whether or there is a method body. List<Stmt> stmts; int end; if (modifiers.contains(Modifier.NATIVE)) { // This is a native function or method which does not have a body. end = index; matchEndLine(); stmts = Collections.EMPTY_LIST; } else { match(Colon); end = index; matchEndLine(); scope.declareThisLifetime(); stmts = parseBlock(wf, scope, false); } WhileyFile.Declaration declaration; if (isFunction) { declaration = wf.new Function(modifiers, name.text, returns, parameters, requires, ensures, stmts, sourceAttr(start, end - 1)); } else { declaration = wf.new Method(modifiers, name.text, returns, parameters, lifetimeParameters, requires, ensures, stmts, sourceAttr(start, end - 1)); } wf.add(declaration); } /** * Parse a <i>property declaration</i> which has the form: * * <pre> * ProeprtyDeclaration ::= "property" Parameters "->" Parameters (WhereClause)* * PropertyClause ::= "where" Expr * </pre> * */ private void parsePropertyDeclaration(WhileyFile wf, List<Modifier> modifiers) { int start = index; match(Property); Token name = match(Identifier); // EnclosingScope scope = new EnclosingScope(); List<Parameter> parameters = parseParameters(wf, scope); ArrayList<Expr> invariant = new ArrayList<>(); // Check whether or not there are optional "where" clauses. while (tryAndMatch(true, Where) != null) { invariant.add(parseLogicalExpression(wf, scope, false)); } int end = index; matchEndLine(); WhileyFile.Declaration declaration = wf.new Property(modifiers, name.text, parameters, invariant, sourceAttr(start, end - 1)); wf.add(declaration); return; } public List<Parameter> parseParameters(WhileyFile wf, EnclosingScope scope) { match(LeftBrace); ArrayList<Parameter> parameters = new ArrayList<>(); boolean firstTime = true; while (eventuallyMatch(RightBrace) == null) { if (!firstTime) { match(Comma); } firstTime = false; int pStart = index; Pair<SyntacticType, Token> p = parseMixedType(scope); Token id = p.second(); scope.declareVariable(id); parameters.add(wf.new Parameter(p.first(), id.text, sourceAttr(pStart, index - 1))); } return parameters; } public List<Parameter> parseOptionalParameters(WhileyFile wf, EnclosingScope scope) { int next = skipWhiteSpace(index); if (next < tokens.size() && tokens.get(next).kind == LeftBrace) { return parseParameters(wf, scope); } else { Parameter p = parseOptionalParameter(wf, scope); ArrayList<Parameter> ps = new ArrayList<>(); ps.add(p); return ps; } } public Parameter parseOptionalParameter(WhileyFile wf, EnclosingScope scope) { int start = index; boolean braced = false; SyntacticType type; String name; if (tryAndMatch(true, LeftBrace) != null) { Pair<SyntacticType, Token> p = parseMixedType(scope); type = p.first(); name = p.second().text; scope.declareVariable(p.second()); match(RightBrace); } else { type = parseType(scope); // The following anonymous variable name is used in order that it // can be accessed via "field aliases", which occur in the case of // record type declarations. name = "$"; } return wf.new Parameter(type, name, sourceAttr(start, index - 1)); } /** * Parse a type declaration in a Whiley source file, which has the form: * * <pre> * "type" Identifier "is" TypePattern ("where" Expr)* * </pre> * * Here, the type pattern specifies a type which may additionally be adorned * with variable names. The "where" clause is optional and is often referred * to as the type's "constraint". Variables defined within the type pattern * may be used within this constraint expressions. A simple example to * illustrate is: * * <pre> * type nat is (int x) where x >= 0 * </pre> * * Here, we are defining a <i>constrained type</i> called <code>nat</code> * which represents the set of natural numbers (i.e the non-negative * integers). Type declarations may also have modifiers, such as * <code>public</code> and <code>private</code>. * * @see wyc.lang.WhileyFile.Type * * @param wf * --- The Whiley file in which this declaration is defined. * @param modifiers * --- The list of modifiers for this declaration (which were * already parsed before this method was called). */ public void parseTypeDeclaration(WhileyFile wf, List<Modifier> modifiers) { int start = index; // Match identifier rather than kind e.g. Type to avoid "type" being a // keyword. match(Identifier); // Token name = match(Identifier); match(Is); // Parse the type pattern EnclosingScope scope = new EnclosingScope(); Parameter p = parseOptionalParameter(wf, scope); addFieldAliases(p, scope); ArrayList<Expr> invariant = new ArrayList<>(); // Check whether or not there is an optional "where" clause. while (tryAndMatch(true, Where) != null) { // Yes, there is a "where" clause so parse the constraint. First, // construct the environment which will be used to identify the set // of declared variables in the current scope. invariant.add(parseLogicalExpression(wf, scope, false)); } int end = index; matchEndLine(); WhileyFile.Declaration declaration = wf.new Type(modifiers, p, name.text, invariant, sourceAttr(start, end - 1)); wf.add(declaration); return; } /** * In the special case of a record type declaration, those fields contained * in the record are registered as "field aliases". This means they can be * referred to directly from the type invariant, rather than requiring an * additional variable be declared. For example, the following is permitted: * * <pre> * type Point is {int x, int y} where x >= 0 && y >= 0 * </pre> * * Here, <code>x</code> and <code>y</code> are "field aliases" within the * scope of the type invariant. In essence, what happens is that the above * is silently transformed into the following: * * <pre> * type Point is ({int x, int y} $) where $.x >= 0 && $.y >= 0 * </pre> * * The anonymous variable name <code>$</code> is chosen because it cannot * conflict with a declared variable in the program source (i.e. it is not a * valid variable identifier). * * @param p * @param scope */ private void addFieldAliases(Parameter p, EnclosingScope scope) { SyntacticType t = p.type; if(t instanceof SyntacticType.Record) { // This is currently the only situation in which field aliases can // arise. SyntacticType.Record r = (SyntacticType.Record) t; for(Map.Entry<String, SyntacticType> e : r.types.entrySet()) { scope.declareFieldAlias(e.getKey()); } } } /** * Parse a constant declaration in a Whiley source file, which has the form: * * <pre> * ConstantDeclaration ::= "constant" Identifier "is"Expr * </pre> * * A simple example to illustrate is: * * <pre> * constant PI is 3.141592654 * </pre> * * Here, we are defining a constant called <code>PI</code> which represents * the decimal value "3.141592654". Constant declarations may also have * modifiers, such as <code>public</code> and <code>private</code>. * * @see wyc.lang.WhileyFile.Constant * * @param wf * --- The Whiley file in which this declaration is defined. * @param modifiers * --- The list of modifiers for this declaration (which were * already parsed before this method was called). */ private void parseConstantDeclaration(WhileyFile wf, List<Modifier> modifiers) { int start = index; // Match identifier rather than kind e.g. constant to avoid "constant" // being a // keyword. match(Identifier); // Token name = match(Identifier); match(Is); Expr e = parseExpression(wf, new EnclosingScope(), false); int end = index; matchEndLine(); WhileyFile.Declaration declaration = wf.new Constant(modifiers, e, name.text, sourceAttr(start, end - 1)); wf.add(declaration); } /** * Parse a block of zero or more statements which share the same indentation * level. Their indentation level must be strictly greater than that of * their parent, otherwise the end of block is signaled. The <i>indentation * level</i> for the block is set by the first statement encountered * (assuming their is one). An error occurs if a subsequent statement is * reached with an indentation level <i>greater</i> than the block's * indentation level. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param parentIndent * The indentation level of the parent, for which all statements * in this block must have a greater indent. May not be * <code>null</code>. * @param isLoop * Indicates whether or not this block represents the body of a * loop. This is important in order to setup the scope for this * block appropriately. * @return */ private List<Stmt> parseBlock(WhileyFile wf, EnclosingScope scope, boolean isLoop) { // First, determine the initial indentation of this block based on the // first statement (or null if there is no statement). Indent indent = getIndent(); // We must clone the environment here, in order to ensure variables // declared within this block are properly scoped. EnclosingScope blockScope = scope.newEnclosingScope(indent, isLoop); // Second, check that this is indeed the initial indentation for this // block (i.e. that it is strictly greater than parent indent). if (indent == null || indent.lessThanEq(scope.getIndent())) { // Initial indent either doesn't exist or is not strictly greater // than parent indent and,therefore, signals an empty block. // return Collections.EMPTY_LIST; } else { // Initial indent is valid, so we proceed parsing statements with // the appropriate level of indent. // ArrayList<Stmt> stmts = new ArrayList<>(); Indent nextIndent; while ((nextIndent = getIndent()) != null && indent.lessThanEq(nextIndent)) { // At this point, nextIndent contains the indent of the current // statement. However, this still may not be equivalent to this // block's indentation level. // First, check the indentation matches that for this block. if (!indent.equivalent(nextIndent)) { // No, it's not equivalent so signal an error. syntaxError("unexpected end-of-block", nextIndent); } // Second, parse the actual statement at this point! stmts.add(parseStatement(wf, blockScope)); } return stmts; } } /** * Determine the indentation as given by the Indent token at this point (if * any). If none, then <code>null</code> is returned. * * @return */ private Indent getIndent() { skipEmptyLines(); if (index < tokens.size()) { Token token = tokens.get(index); if (token.kind == Indent) { return new Indent(token.text, token.start); } return null; } return null; } /** * Parse a given statement. There are essentially two forms of statement: * <code>simple</code> and <code>compound</code>. Simple statements (e.g. * assignment, <code>debug</code>, etc) are terminated by a * <code>NewLine</code> token, although they may span multiple lines if an * expression does. Compound statements (e.g. <code>if</code>, * <code>while</code>, etc) themselves contain blocks of statements and are * not (generally) terminated by a <code>NewLine</code>. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return */ private Stmt parseStatement(WhileyFile wf, EnclosingScope scope) { checkNotEof(); Token lookahead = tokens.get(index); // First, attempt to parse the easy statement forms. switch (lookahead.kind) { case Assert: return parseAssertStatement(wf, scope); case Assume: return parseAssumeStatement(wf, scope); case Break: return parseBreakStatement(scope); case Continue: return parseContinueStatement(scope); case Do: return parseDoWhileStatement(wf, scope); case Debug: return parseDebugStatement(wf, scope); case Fail: return parseFailStatement(scope); case If: return parseIfStatement(wf, scope); case Return: return parseReturnStatement(wf, scope); case While: return parseWhileStatement(wf, scope); case Skip: return parseSkipStatement(scope); case Switch: return parseSwitchStatement(wf, scope); default: // fall through to the more difficult cases } // At this point, we have three possibilities remaining: variable // declaration, invocation, assignment, or a named block. // The latter one can be detected easily as it is just an identifier // followed by a colon. To disambiguate the remaining cases, we // first determine whether or not what follows *must* be parsed as a // type (i.e. parsing it as an expression would fail). If so, then it // must be a variable declaration that follows. Otherwise, it can still // be *any* of the three forms, but we definitely have an // expression-like thing at this point. Therefore, we parse that // expression and see what this gives and/or what follows... return parseHeadlessStatement(wf, scope); } /** * A headless statement is one which has no identifying keyword. The set of * headless statements include assignments, invocations, variable * declarations and named blocks. * * @param wf * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return */ private Stmt parseHeadlessStatement(WhileyFile wf, EnclosingScope scope) { int start = index; // See if it is a named block Token blockName = tryAndMatch(true, Identifier); if (blockName != null) { if (tryAndMatch(true, Colon) != null && isAtEOL()) { int end = index; matchEndLine(); scope = scope.newEnclosingScope(); scope.declareLifetime(blockName); List<Stmt> body = parseBlock(wf, scope, false); return new Stmt.NamedBlock(blockName.text, body, sourceAttr(start, end - 1)); } else { index = start; // backtrack } } // Remaining cases: assignments, invocations and variable declarations SyntacticType type = parseDefiniteType(scope); if (type == null) { // Can still be a variable declaration, assignment or invocation. Expr e = parseExpression(wf, scope, false); if (e instanceof Expr.AbstractInvoke || e instanceof Expr.AbstractIndirectInvoke) { // Must be an invocation since these are neither valid // lvals (i.e. they cannot be assigned) nor types. matchEndLine(); return (Stmt) e; } else if (tryAndMatch(true, Equals) != null) { // Must be an assignment a valid type cannot be followed by "=" // on its own. Therefore, we backtrack and attempt to parse the // expression as an lval (i.e. as part of an assignment // statement). index = start; // backtrack // return parseAssignmentStatement(wf, scope); } else if (tryAndMatch(true, Comma) != null) { // Must be an multi-assignment index = start; // backtrack // return parseAssignmentStatement(wf, scope); } else { // At this point, we must be left with a variable declaration. // Therefore, we backtrack and parse the expression again as a // type. index = start; // backtrack type = parseType(scope); } } // Must be a variable declaration here. Token name = match(Identifier); WhileyFile.Parameter decl = wf.new Parameter(type, name.text, sourceAttr(start, index - 1)); return parseVariableDeclaration(start, decl, wf, scope); } /** * Parse a variable declaration statement which has the form: * * <pre> * Type Identifier ['=' Expr] NewLine * </pre> * * The optional <code>Expression</code> assignment is referred to as an * <i>initialiser</i>. * * @param parameter * The declared type for the variable, which will have already * been parsed when disambiguating this statement from another. * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.VariableDeclaration * * @return */ private Stmt.VariableDeclaration parseVariableDeclaration(int start, Parameter parameter, WhileyFile wf, EnclosingScope scope) { // Ensure at least one variable is defined by this pattern. // Check that declared variables are not already defined. scope.checkNameAvailable(parameter); // A variable declaration may optionally be assigned an initialiser // expression. Expr initialiser = null; if (tryAndMatch(true, Token.Kind.Equals) != null) { initialiser = parseExpression(wf, scope, false); } // Now, a new line indicates the end-of-statement int end = index; matchEndLine(); // Finally, register the new variable in the enclosing scope. This // should be done after parsing the initialiser expression to prevent it // from referring to this variable. scope.declareVariable(parameter); // Done. return new Stmt.VariableDeclaration(parameter, initialiser, sourceAttr(start, end - 1)); } /** * Parse a return statement, which has the form: * * <pre> * ReturnStmt ::= "return" [Expr] NewLine * </pre> * * The optional expression is referred to as the <i>return value</i>. Note * that, the returned expression (if there is one) must begin on the same * line as the return statement itself. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Return * @return */ private Stmt.Return parseReturnStatement(WhileyFile wf, EnclosingScope scope) { int start = index; match(Return); // A return statement may optionally have one or more return // expressions. Therefore, we first skip all whitespace on the given // line. int next = skipLineSpace(index); // Then, we check whether or not we reached the end of the line. If not, // then we assume what's remaining is the returned expression. This // means expressions must start on the same line as a return. Otherwise, // a potentially cryptic error message will be given. List<Expr> returns = Collections.EMPTY_LIST; if (next < tokens.size() && tokens.get(next).kind != NewLine) { returns = parseExpressions(wf, scope, false); } // Finally, at this point we are expecting a new-line to signal the // end-of-statement. int end = index; matchEndLine(); // Done. return new Stmt.Return(returns, sourceAttr(start, end - 1)); } /** * Parse an assert statement, which is of the form: * * <pre> * AssertStmt ::= "assert" Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Assert * @return */ private Stmt.Assert parseAssertStatement(WhileyFile wf, EnclosingScope scope) { int start = index; // Match the assert keyword match(Assert); // Parse the expression to be printed Expr e = parseLogicalExpression(wf, scope, false); // Finally, at this point we are expecting a new-line to signal the // end-of-statement. int end = index; matchEndLine(); // Done. return new Stmt.Assert(e, sourceAttr(start, end - 1)); } /** * Parse an assume statement, which is of the form: * * <pre> * AssumeStmt ::= "assume" Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Assume * @return */ private Stmt.Assume parseAssumeStatement(WhileyFile wf, EnclosingScope scope) { int start = index; // Match the assume keyword match(Assume); // Parse the expression to be printed Expr e = parseLogicalExpression(wf, scope, false); // Finally, at this point we are expecting a new-line to signal the // end-of-statement. int end = index; matchEndLine(); // Done. return new Stmt.Assume(e, sourceAttr(start, end - 1)); } /** * Parse a break statement, which is of the form: * * <pre> * BreakStmt ::= "break" * </pre> * * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Break * @return */ private Stmt.Break parseBreakStatement(EnclosingScope scope) { int start = index; // Match the break keyword Token t = match(Break); int end = index; matchEndLine(); // Check that break statement makes sense at this point. if (!scope.isInLoop()) { syntaxError(errorMessage(BREAK_OUTSIDE_SWITCH_OR_LOOP), t); } // Done. return new Stmt.Break(sourceAttr(start, end - 1)); } /** * Parse a continue statement, which is of the form: * * <pre> * ContinueStmt ::= "continue" * </pre> * * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Continue * @return */ private Stmt.Continue parseContinueStatement(EnclosingScope scope) { int start = index; // Match the continue keyword Token t = match(Continue); int end = index; matchEndLine(); // Check that continue statement makes sense at this point. if (!scope.isInLoop()) { syntaxError(errorMessage(CONTINUE_OUTSIDE_LOOP), t); } // Done. return new Stmt.Continue(sourceAttr(start, end - 1)); } /** * Parse a debug statement, which is of the form: * * <pre> * DebugStmt ::= "debug" Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Debug * @return */ private Stmt.Debug parseDebugStatement(WhileyFile wf, EnclosingScope scope) { int start = index; // Match the debug keyword match(Debug); // Parse the expression to be printed Expr e = parseExpression(wf, scope, false); // Finally, at this point we are expecting a new-line to signal the // end-of-statement. int end = index; matchEndLine(); // Done. return new Stmt.Debug(e, sourceAttr(start, end - 1)); } /** * Parse a do-while statement, which has the form: * * <pre> * DoWhileStmt ::= "do" ':' NewLine Block "where" Expr ("where" Expr)* * </pre> * * @see wyc.lang.Stmt.DoWhile * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return * @author David J. Pearce * */ private Stmt parseDoWhileStatement(WhileyFile wf, EnclosingScope scope) { int start = index; match(Do); match(Colon); int end = index; matchEndLine(); // match the block List<Stmt> blk = parseBlock(wf, scope, true); // match while and condition match(While); Expr condition = parseLogicalExpression(wf, scope, false); // Parse the loop invariants List<Expr> invariants = new ArrayList<>(); while (tryAndMatch(true, Where) != null) { invariants.add(parseLogicalExpression(wf, scope, false)); } matchEndLine(); return new Stmt.DoWhile(condition, invariants, blk, sourceAttr(start, end - 1)); } /** * Parse a fail statement, which is of the form: * * <pre> * FailStmt ::= "fail" * </pre> * * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Fail * @return */ private Stmt.Fail parseFailStatement(EnclosingScope scope) { int start = index; // Match the fail keyword match(Fail); int end = index; matchEndLine(); // Done. return new Stmt.Fail(sourceAttr(start, end - 1)); } /** * Parse a classical if-else statement, which is has the form: * * <pre> * "if" Expr ':' NewLine Block ["else" ':' NewLine Block] * </pre> * * The first expression is referred to as the <i>condition</i>, while the * first block is referred to as the <i>true branch</i>. The optional second * block is referred to as the <i>false branch</i>. * * @see wyc.lang.Stmt.IfElse * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return */ private Stmt.IfElse parseIfStatement(WhileyFile wf, EnclosingScope scope) { int start = index; // An if statement begins with the keyword "if", followed by an // expression representing the condition. match(If); // NOTE: expression terminated by ':' Expr c = parseLogicalExpression(wf, scope, true); // The a colon to signal the start of a block. match(Colon); matchEndLine(); int end = index; // First, parse the true branch, which is required List<Stmt> tblk = parseBlock(wf, scope, scope.isInLoop()); // Second, attempt to parse the false branch, which is optional. List<Stmt> fblk = Collections.emptyList(); if (tryAndMatchAtIndent(true, scope.getIndent(), Else) != null) { int if_start = index; if (tryAndMatch(true, If) != null) { // This is an if-chain, so backtrack and parse a complete If index = if_start; fblk = new ArrayList<>(); fblk.add(parseIfStatement(wf, scope)); } else { match(Colon); matchEndLine(); fblk = parseBlock(wf, scope, scope.isInLoop()); } } // Done! return new Stmt.IfElse(c, tblk, fblk, sourceAttr(start, end - 1)); } /** * Parse a while statement, which has the form: * * <pre> * WhileStmt ::= "while" Expr ("where" Expr)* ':' NewLine Block * </pre> * * @see wyc.lang.Stmt.While * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return * @author David J. Pearce * */ private Stmt parseWhileStatement(WhileyFile wf, EnclosingScope scope) { int start = index; match(While); // NOTE: expression terminated by ':' Expr condition = parseLogicalExpression(wf, scope, true); // Parse the loop invariants List<Expr> invariants = new ArrayList<>(); while (tryAndMatch(true, Where) != null) { // NOTE: expression terminated by ':' invariants.add(parseLogicalExpression(wf, scope, true)); } match(Colon); int end = index; matchEndLine(); List<Stmt> blk = parseBlock(wf, scope, true); return new Stmt.While(condition, invariants, blk, sourceAttr(start, end - 1)); } /** * Parse a skip statement, which is of the form: * * <pre> * SkipStmt ::= "skip" * </pre> * * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @see wyc.lang.Stmt.Skip * @return */ private Stmt.Skip parseSkipStatement(EnclosingScope scope) { int start = index; // Match the break keyword match(Skip); int end = index; matchEndLine(); // Done. return new Stmt.Skip(sourceAttr(start, end - 1)); } /** * Parse a switch statement, which has the form: * * <pre> * SwitchStmt ::= "switch" Expr ':' NewLine CaseStmt+ * * CaseStmt ::= "case" UnitExpr (',' UnitExpr)* ':' NewLine Block * </pre> * * @see wyc.lang.Stmt.Switch * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return * @author David J. Pearce * */ private Stmt parseSwitchStatement(WhileyFile wf, EnclosingScope scope) { int start = index; match(Switch); // NOTE: expression terminated by ':' Expr condition = parseExpression(wf, scope, true); match(Colon); int end = index; matchEndLine(); // Match case block List<Stmt.Case> cases = parseCaseBlock(wf, scope); // Done return new Stmt.Switch(condition, cases, sourceAttr(start, end - 1)); } /** * Parse a block of zero or more case statements which share the same * indentation level. Their indentation level must be strictly greater than * that of their parent, otherwise the end of block is signalled. The * <i>indentation level</i> for the block is set by the first statement * encountered (assuming their is one). An error occurs if a subsequent * statement is reached with an indentation level <i>greater</i> than the * block's indentation level. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return */ private List<Stmt.Case> parseCaseBlock(WhileyFile wf, EnclosingScope scope) { // First, determine the initial indentation of this block based on the // first statement (or null if there is no statement). Indent indent = getIndent(); // We must create a new scope to ensure variables declared within this // block are not visible in the enclosing scope. EnclosingScope caseScope = scope.newEnclosingScope(indent); // Second, check that this is indeed the initial indentation for this // block (i.e. that it is strictly greater than parent indent). if (indent == null || indent.lessThanEq(scope.getIndent())) { // Initial indent either doesn't exist or is not strictly greater // than parent indent and,therefore, signals an empty block. // return Collections.EMPTY_LIST; } else { // Initial indent is valid, so we proceed parsing case statements // with the appropriate level of indent. // ArrayList<Stmt.Case> cases = new ArrayList<>(); Indent nextIndent; while ((nextIndent = getIndent()) != null && indent.lessThanEq(nextIndent)) { // At this point, nextIndent contains the indent of the current // statement. However, this still may not be equivalent to this // block's indentation level. // First, check the indentation matches that for this block. if (!indent.equivalent(nextIndent)) { // No, it's not equivalent so signal an error. syntaxError("unexpected end-of-block", indent); } // Second, parse the actual case statement at this point! cases.add(parseCaseStatement(wf, caseScope)); } checkForDuplicateDefault(cases); return cases; } } /** * Check whether we have a duplicate default statement, or a case which * occurs after a default statement (and, hence, is unreachable). * * @param cases */ private void checkForDuplicateDefault(List<Stmt.Case> cases) { boolean hasDefault = false; for (Stmt.Case c : cases) { if (c.expr.size() > 0 && hasDefault) { syntaxError(errorMessage(UNREACHABLE_CODE), c); } else if (c.expr.size() == 0 && hasDefault) { syntaxError(errorMessage(DUPLICATE_DEFAULT_LABEL), c); } else { hasDefault = c.expr.size() == 0; } } } /** * Parse a case Statement, which has the form: * * <pre> * CaseStmt ::= "case" NonTupleExpr (',' NonTupleExpression)* ':' NewLine Block * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return */ private Stmt.Case parseCaseStatement(WhileyFile wf, EnclosingScope scope) { int start = index; List<Expr> values; if (tryAndMatch(true, Default) != null) { values = Collections.EMPTY_LIST; } else { match(Case); // Now, parse one or more constant expressions values = new ArrayList<>(); do { // NOTE: expression terminated by ':' values.add(parseExpression(wf, scope, true)); } while (tryAndMatch(true, Comma) != null); } match(Colon); int end = index; matchEndLine(); List<Stmt> stmts = parseBlock(wf, scope, scope.isInLoop()); return new Stmt.Case(values, stmts, sourceAttr(start, end - 1)); } /** * Parse an assignment statement, which has the form: * * <pre> * AssignStmt ::= LVal '=' Expr * </pre> * * Here the <code>lhs</code> must be an <code>LVal</code> --- that is, an * expression permitted on the left-side of an assignment. The following * illustrates different possible assignment statements: * * <pre> * x = y // variable assignment * x,y = z // multi-assignment * x.f = y // field assignment * x[i] = y // array assignment * x[i].f = y // compound assignment * </pre> * * The last assignment here illustrates that the left-hand side of an * assignment can be arbitrarily complex, involving nested assignments into * arrays and records. * * @see wyc.lang.Stmt.Assign * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @return */ private Stmt parseAssignmentStatement(WhileyFile wf, EnclosingScope scope) { int start = index; List<Expr.LVal> lvals = parseLVals(wf, scope); match(Equals); List<Expr> rvals = parseExpressions(wf, scope, false); int end = index; matchEndLine(); return new Stmt.Assign(lvals, rvals, sourceAttr(start, end - 1)); } /** * Parse an "lval" expression, which is a subset of the possible expressions * forms permitted on the left-hand side of an assignment. LVals are of the * form: * * <pre> * LVal ::= LValTerm (',' LValTerm)* ')' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @return */ private List<Expr.LVal> parseLVals(WhileyFile wf, EnclosingScope scope) { int start = index; ArrayList<Expr.LVal> elements = new ArrayList<>(); elements.add(parseLVal(index, wf, scope)); // Check whether we have a multiple lvals or not while (tryAndMatch(true, Comma) != null) { // Add all expressions separated by a comma elements.add(parseLVal(index, wf, scope)); // Done } return elements; } private Expr.LVal parseLVal(int start, WhileyFile wf, EnclosingScope scope) { return parseAccessLVal(start, wf, scope); } /** * Parse an access lval, which is of the form: * * <pre> * AccessLVal ::= TermLVal * | AccessLVal '.' Identifier // Field assignment * | AccessLVal '->' Identifier // dereference field assigmment * | '*' AccessLVal // dereference assigmment * | AccessLVal '[' Expr ']' // index assigmment * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @return */ private Expr.LVal parseAccessLVal(int start, WhileyFile wf, EnclosingScope scope) { Expr.LVal lhs = parseLValTerm(start, wf, scope); Token token; while ((token = tryAndMatchOnLine(LeftSquare)) != null || (token = tryAndMatch(true, Dot, MinusGreater)) != null) { switch (token.kind) { case LeftSquare: // NOTE: expression is terminated by ']' Expr rhs = parseAdditiveExpression(wf, scope, true); match(RightSquare); lhs = new Expr.IndexOf(lhs, rhs, sourceAttr(start, index - 1)); break; case MinusGreater: lhs = new Expr.Dereference(lhs, sourceAttr(start, index - 1)); // Fall Through case Dot: String name = match(Identifier).text; lhs = new Expr.FieldAccess(lhs, name, sourceAttr(start, index - 1)); break; } } return lhs; } /** * Parse an lval term, which is of the form: * * <pre> * TermLVal ::= Identifier // Variable assignment * | '(' LVal ')' // Bracketed assignment * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @return */ private Expr.LVal parseLValTerm(int start, WhileyFile wf, EnclosingScope scope) { checkNotEof(); // First, attempt to disambiguate the easy forms: Token lookahead = tokens.get(index); switch (lookahead.kind) { case Identifier: match(Identifier); return new Expr.AssignedVariable(lookahead.text, sourceAttr(start, index - 1)); case LeftBrace: { match(LeftBrace); Expr.LVal lval = parseLVal(start, wf, scope); match(RightBrace); return lval; } case Star: { match(Star); Expr.LVal lval = parseLVal(start, wf, scope); return new Expr.Dereference(lval, sourceAttr(start, index - 1)); } default: syntaxError("unrecognised lval", lookahead); return null; // dead-code } } /** * Parse a "multi-expression"; that is, a sequence of one or more * expressions separated by comma's * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * @return */ public List<Expr> parseExpressions(WhileyFile wf, EnclosingScope scope, boolean terminated) { ArrayList<Expr> returns = new ArrayList<>(); // A return statement may optionally have a return expression. // Therefore, we first skip all whitespace on the given line. int next = skipLineSpace(index); // Then, we check whether or not we reached the end of the line. If not, // then we assume what's remaining is the returned expression. This // means expressions must start on the same line as a return. Otherwise, // a potentially cryptic error message will be given. returns.add(parseExpression(wf, scope, terminated)); while (tryAndMatch(false, Comma) != null) { returns.add(parseExpression(wf, scope, terminated)); } return returns; } /** * Parse a unit expression, which has the form: * * <pre> * UnitExpr::= LogicalExpression * </pre> * * <p> * A unit expression is essentially any expression, except that it is not * allowed to be a tuple expression. More specifically, it cannot be * followed by ',' (e.g. because the enclosing context uses ','). * </p> * * <p> * As an example consider a record expression, such as * <code>{x: e1, y: e2}</code>. Here, the sub-expression "e1" must be a * non-tuple expression since it is followed by ',' to signal the start of * the next field "y". Of course, e1 can be a tuple expression if we use * brackets as these help disambiguate the context. * </p> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * @return */ private Expr parseExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { return parseLogicalExpression(wf, scope, terminated); } /** * Parse a logical expression of the form: * * <pre> * Expr ::= AndOrExpr [ "==>" UnitExpr] * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseLogicalExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { checkNotEof(); int start = index; Expr lhs = parseAndOrExpression(wf, scope, terminated); Token lookahead = tryAndMatch(terminated, LogicalImplication, LogicalIff); if (lookahead != null) { switch (lookahead.kind) { case LogicalImplication: { Expr rhs = parseExpression(wf, scope, terminated); // FIXME: this is something of a hack, although it does work. It // would be nicer to have a binary expression kind for logical // implication. lhs = new Expr.UnOp(Expr.UOp.NOT, lhs, sourceAttr(start, index - 1)); // return new Expr.BinOp(Expr.BOp.OR, lhs, rhs, sourceAttr(start, index - 1)); } case LogicalIff: { Expr rhs = parseExpression(wf, scope, terminated); // FIXME: this is something of a hack, although it does work. It // would be nicer to have a binary expression kind for logical // implication. Expr nlhs = new Expr.UnOp(Expr.UOp.NOT, lhs, sourceAttr(start, index - 1)); Expr nrhs = new Expr.UnOp(Expr.UOp.NOT, rhs, sourceAttr(start, index - 1)); // nlhs = new Expr.BinOp(Expr.BOp.AND, nlhs, nrhs, sourceAttr(start, index - 1)); nrhs = new Expr.BinOp(Expr.BOp.AND, lhs, rhs, sourceAttr(start, index - 1)); // return new Expr.BinOp(Expr.BOp.OR, nlhs, nrhs, sourceAttr(start, index - 1)); } default: throw new RuntimeException("deadcode"); // dead-code } } return lhs; } /** * Parse a logical expression of the form: * * <pre> * Expr ::= ConditionExpr [ ( "&&" | "||" ) Expr] * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseAndOrExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { checkNotEof(); int start = index; Expr lhs = parseBitwiseOrExpression(wf, scope, terminated); Token lookahead = tryAndMatch(terminated, LogicalAnd, LogicalOr); if (lookahead != null) { Expr.BOp bop; switch (lookahead.kind) { case LogicalAnd: bop = Expr.BOp.AND; break; case LogicalOr: bop = Expr.BOp.OR; break; default: throw new RuntimeException("deadcode"); // dead-code } Expr rhs = parseExpression(wf, scope, terminated); return new Expr.BinOp(bop, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse an bitwise "inclusive or" expression * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseBitwiseOrExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseBitwiseXorExpression(wf, scope, terminated); if (tryAndMatch(terminated, VerticalBar) != null) { Expr rhs = parseExpression(wf, scope, terminated); return new Expr.BinOp(Expr.BOp.BITWISEOR, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse an bitwise "exclusive or" expression * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseBitwiseXorExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseBitwiseAndExpression(wf, scope, terminated); if (tryAndMatch(terminated, Caret) != null) { Expr rhs = parseExpression(wf, scope, terminated); return new Expr.BinOp(Expr.BOp.BITWISEXOR, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse an bitwise "and" expression * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseBitwiseAndExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseConditionExpression(wf, scope, terminated); if (tryAndMatch(terminated, Ampersand) != null) { Expr rhs = parseExpression(wf, scope, terminated); return new Expr.BinOp(Expr.BOp.BITWISEAND, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse a condition expression. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseConditionExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Token lookahead; // First, attempt to parse quantifiers (e.g. some, all, no, etc) if ((lookahead = tryAndMatch(terminated, Some, All)) != null) { return parseQuantifierExpression(lookahead, wf, scope, terminated); } Expr lhs = parseShiftExpression(wf, scope, terminated); lookahead = tryAndMatch(terminated, LessEquals, LeftAngle, GreaterEquals, RightAngle, EqualsEquals, NotEquals, Is, Subset, SubsetEquals, Superset, SupersetEquals); if (lookahead != null) { Expr.BOp bop; switch (lookahead.kind) { case LessEquals: bop = Expr.BOp.LTEQ; break; case LeftAngle: bop = Expr.BOp.LT; break; case GreaterEquals: bop = Expr.BOp.GTEQ; break; case RightAngle: bop = Expr.BOp.GT; break; case EqualsEquals: bop = Expr.BOp.EQ; break; case NotEquals: bop = Expr.BOp.NEQ; break; case Is: SyntacticType type = parseType(scope); Expr.TypeVal rhs = new Expr.TypeVal(type, sourceAttr(start, index - 1)); return new Expr.BinOp(Expr.BOp.IS, lhs, rhs, sourceAttr(start, index - 1)); default: throw new RuntimeException("deadcode"); // dead-code } Expr rhs = parseShiftExpression(wf, scope, terminated); return new Expr.BinOp(bop, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse a quantifier expression, which is of the form: * * <pre> * QuantExpr ::= ("no" | "some" | "all") * '{' * Identifier "in" Expr (',' Identifier "in" Expr)+ * '|' LogicalExpr * '}' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * @return */ private Expr parseQuantifierExpression(Token lookahead, WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index - 1; // Determine the quantifier operation Expr.QOp cop; switch (lookahead.kind) { case Some: cop = Expr.QOp.SOME; break; case All: cop = Expr.QOp.ALL; break; default: cop = null; // deadcode } match(LeftCurly); // Parse one or more source variables / expressions scope = scope.newEnclosingScope(); List<Triple<String, Expr, Expr>> srcs = new ArrayList<>(); boolean firstTime = true; do { if (!firstTime) { match(Comma); } firstTime = false; Token id = match(Identifier); scope.checkNameAvailable(id); match(In); Expr lhs = parseAdditiveExpression(wf, scope, terminated); match(DotDot); Expr rhs = parseAdditiveExpression(wf, scope, terminated); srcs.add(new Triple<>(id.text, lhs, rhs)); scope.declareVariable(id); } while (eventuallyMatch(VerticalBar) == null); // Parse condition over source variables Expr condition = parseLogicalExpression(wf, scope, terminated); match(RightCurly); // Done return new Expr.Quantifier(cop, srcs, condition, sourceAttr(start, index - 1)); } /** * Parse a range expression, which has the form: * * <pre> * RangeExpr ::= ShiftExpr [ ".." ShiftExpr ] * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseRangeExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseShiftExpression(wf, scope, terminated); if (tryAndMatch(terminated, DotDot) != null) { Expr rhs = parseAdditiveExpression(wf, scope, terminated); return new Expr.BinOp(Expr.BOp.RANGE, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse a shift expression, which has the form: * * <pre> * ShiftExpr ::= AdditiveExpr [ ( "<<" | ">>" ) AdditiveExpr ] * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseShiftExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseAdditiveExpression(wf, scope, terminated); Token lookahead; while ((lookahead = tryAndMatch(terminated, LeftAngleLeftAngle, RightAngleRightAngle)) != null) { Expr rhs = parseAdditiveExpression(wf, scope, terminated); Expr.BOp bop = null; switch (lookahead.kind) { case LeftAngleLeftAngle: bop = Expr.BOp.LEFTSHIFT; break; case RightAngleRightAngle: bop = Expr.BOp.RIGHTSHIFT; break; } lhs = new Expr.BinOp(bop, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse an additive expression. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseAdditiveExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseMultiplicativeExpression(wf, scope, terminated); Token lookahead; while ((lookahead = tryAndMatch(terminated, Plus, Minus)) != null) { Expr.BOp bop; switch (lookahead.kind) { case Plus: bop = Expr.BOp.ADD; break; case Minus: bop = Expr.BOp.SUB; break; default: throw new RuntimeException("deadcode"); // dead-code } Expr rhs = parseMultiplicativeExpression(wf, scope, terminated); lhs = new Expr.BinOp(bop, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse a multiplicative expression. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseMultiplicativeExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseAccessExpression(wf, scope, terminated); Token lookahead = tryAndMatch(terminated, Star, RightSlash, Percent); if (lookahead != null) { Expr.BOp bop; switch (lookahead.kind) { case Star: bop = Expr.BOp.MUL; break; case RightSlash: bop = Expr.BOp.DIV; break; case Percent: bop = Expr.BOp.REM; break; default: throw new RuntimeException("deadcode"); // dead-code } Expr rhs = parseAccessExpression(wf, scope, terminated); return new Expr.BinOp(bop, lhs, rhs, sourceAttr(start, index - 1)); } return lhs; } /** * Parse an <i>access expression</i>, which has the form: * * <pre> * AccessExpr::= PrimaryExpr * | AccessExpr '[' AdditiveExpr ']' * | AccessExpr '[' AdditiveExpr ".." AdditiveExpr ']' * | AccessExpr '.' Identifier * | AccessExpr '.' Identifier '(' [ Expr (',' Expr)* ] ')' * | AccessExpr "->" Identifier * </pre> * * <p> * Access expressions are challenging for several reasons. First, they are * <i>left-recursive</i>, making them more difficult to parse correctly. * Secondly, there are several different forms above and, of these, some * generate multiple AST nodes as well (see below). * </p> * * <p> * This parser attempts to construct the most accurate AST possible and this * requires disambiguating otherwise identical forms. For example, an * expression of the form "aaa.bbb.ccc" can correspond to either a field * access, or a constant expression (e.g. with a package/module specifier). * Likewise, an expression of the form "aaa.bbb.ccc()" can correspond to an * indirect function/method call, or a direct function/method call with a * package/module specifier. To disambiguate these forms, the parser relies * on the fact any sequence of field-accesses must begin with a local * variable. * </p> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseAccessExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; Expr lhs = parseTermExpression(wf, scope, terminated); Token token; while ((token = tryAndMatchOnLine(LeftSquare)) != null || (token = tryAndMatch(terminated, Dot, MinusGreater)) != null) { switch (token.kind) { case LeftSquare: // NOTE: expression guaranteed to be terminated by ']'. Expr rhs = parseAdditiveExpression(wf, scope, true); // This is a plain old array access expression match(RightSquare); lhs = new Expr.IndexOf(lhs, rhs, sourceAttr(start, index - 1)); break; case MinusGreater: lhs = new Expr.Dereference(lhs, sourceAttr(start, index - 1)); // Fall through case Dot: // At this point, we could have a field access, a package access // or a method/function invocation. Therefore, we start by // parsing the field access and then check whether or not its an // invocation. String name = match(Identifier).text; // This indicates we have either a direct or indirect access or // invocation. We can disambiguate between these two categories // by examining what we have parsed already. A direct access or // invocation requires a sequence of identifiers where the first // is not a declared variable name. Path.ID id = parsePossiblePathID(lhs, scope); // First we have to see if it is a method invocation. We can // have optional lifetime arguments in angle brackets. boolean isInvocation = false; List<String> lifetimeArguments = null; if (tryAndMatch(terminated, LeftBrace) != null) { isInvocation = true; } else if (lookaheadSequence(terminated, LeftAngle)) { // This one is a little tricky, as we need some lookahead // effort. We want to see whether it is a method invocation // with lifetime arguments. But "Identifier < ..." can also be a // boolean expression! int oldindex = index; match(LeftAngle); Token lifetime = tryAndMatch(terminated, RightAngle, Identifier, This, Star); if (lifetime != null && (lifetime.kind != Identifier || scope.isLifetime(lifetime.text))) { // then it's definitely a lifetime isInvocation = true; index--; // don't forget the first argument! lifetimeArguments = parseLifetimeArguments(wf, scope); match(LeftBrace); } else { index = oldindex; // backtrack } } if (isInvocation) { // This indicates a direct or indirect invocation. First, // parse arguments to invocation ArrayList<Expr> arguments = parseInvocationArguments(wf, scope); // Second, determine what kind of invocation we have. if (id == null) { // This indicates we have an indirect invocation lhs = new Expr.FieldAccess(lhs, name, sourceAttr(start, index - 1)); lhs = new Expr.AbstractIndirectInvoke(lhs, arguments, lifetimeArguments, sourceAttr(start, index - 1)); } else { // This indicates we have an direct invocation lhs = new Expr.AbstractInvoke(name, id, arguments, lifetimeArguments, sourceAttr(start, index - 1)); } } else if (id != null) { // Must be a qualified constant access lhs = new Expr.ConstantAccess(name, id, sourceAttr(start, index - 1)); } else { // Must be a plain old field access. lhs = new Expr.FieldAccess(lhs, name, sourceAttr(start, index - 1)); } } } return lhs; } /** * Attempt to parse a possible module identifier. This will reflect a true * module identifier only if the root variable is not in the given * environment. * * @param src * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @return */ private Path.ID parsePossiblePathID(Expr src, EnclosingScope scope) { if (src instanceof Expr.LocalVariable) { // this is a local variable, indicating that the we did not have // a module identifier. return null; } else if (src instanceof Expr.ConstantAccess) { Expr.ConstantAccess ca = (Expr.ConstantAccess) src; return Trie.ROOT.append(ca.name); } else if (src instanceof Expr.FieldAccess) { Expr.FieldAccess ada = (Expr.FieldAccess) src; Path.ID id = parsePossiblePathID(ada.src, scope); if (id != null) { return id.append(ada.name); } else { return null; } } else { return null; } } /** * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseTermExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { checkNotEof(); int start = index; Token token = tokens.get(index); switch (token.kind) { case LeftBrace: return parseBracketedExpression(wf, scope, terminated); case New: case This: return parseNewExpression(wf, scope, terminated); case Identifier: match(Identifier); if (tryAndMatch(terminated, LeftBrace) != null) { return parseInvokeExpression(wf, scope, start, token, terminated, null); } else if (lookaheadSequence(terminated, Colon, New)) { // Identifier is lifetime name in "new" expression index = start; return parseNewExpression(wf, scope, terminated); } else if (lookaheadSequence(terminated, LeftAngle)) { // This one is a little tricky, as we need some lookahead // effort. We want to see whether it is a method invocation with // lifetime arguments. But "Identifier < ..." can also be a // boolean expression! int oldindex = index; match(LeftAngle); Token lifetime = tryAndMatch(terminated, RightAngle, Identifier, This, Star); if (lifetime != null && (lifetime.kind != Identifier // then // it's // definitely // a // lifetime || scope.isLifetime(lifetime.text))) { index--; // don't forget the first argument! List<String> lifetimeArguments = parseLifetimeArguments(wf, scope); match(LeftBrace); return parseInvokeExpression(wf, scope, start, token, terminated, lifetimeArguments); } else { index = oldindex; // backtrack } } else if (lookaheadSequence(terminated, LeftCurly)) { // This indicates a named record initialiser which consists of a // name followed by a record initialiser. return parseRecordInitialiser(token.text, wf, scope, terminated); } // no else if, in case the former one didn't return if (scope.isVariable(token.text)) { // Signals a local variable access return new Expr.LocalVariable(token.text, sourceAttr(start, index - 1)); } else if (scope.isFieldAlias(token.text)) { // Signals a field alias Expr anon = new Expr.LocalVariable("$", sourceAttr(start, index - 1)); return new Expr.FieldAccess(anon, token.text, sourceAttr(start, index - 1)); } else { // Otherwise, this must be a constant access of some kind. // Observe that, at this point, we cannot determine whether or // not this is a constant-access or a package-access which marks // the beginning of a constant-access. return new Expr.ConstantAccess(token.text, null, sourceAttr(start, index - 1)); } case Null: return new Expr.Constant(wyil.lang.Constant.Null, sourceAttr(start, index++)); case True: return new Expr.Constant(Constant.True, sourceAttr(start, index++)); case False: return new Expr.Constant(Constant.False, sourceAttr(start, index++)); case ByteValue: { byte val = parseByte(token); return new Expr.Constant(new Constant.Byte(val), sourceAttr(start, index++)); } case CharValue: { BigInteger c = parseCharacter(token.text); return new Expr.Constant(new Constant.Integer(c), sourceAttr(start, index++)); } case IntValue: { BigInteger val = new BigInteger(token.text); return new Expr.Constant(new Constant.Integer(val), sourceAttr(start, index++)); } case StringValue: { List<Constant> str = parseString(token.text); return new Expr.Constant(new Constant.Array(str), sourceAttr(start, index++)); } case Minus: return parseNegationExpression(wf, scope, terminated); case VerticalBar: return parseLengthOfExpression(wf, scope, terminated); case LeftSquare: return parseArrayInitialiserOrGeneratorExpression(wf, scope, terminated); case LeftCurly: return parseRecordInitialiser(null, wf, scope, terminated); case Shreak: return parseLogicalNotExpression(wf, scope, terminated); case Star: if (lookaheadSequence(terminated, Star, Colon, New)) { // Star is default lifetime return parseNewExpression(wf, scope, terminated); } return parseDereferenceExpression(wf, scope, terminated); case Tilde: return parseBitwiseComplementExpression(wf, scope, terminated); case Ampersand: return parseLambdaOrAddressExpression(wf, scope, terminated); } syntaxError("unrecognised term", token); return null; } /** * Parse an expression beginning with a left brace. This is either a cast or * bracketed expression: * * <pre> * BracketedExpr ::= '(' Type ')' Expr * | '(' Expr ')' * </pre> * * <p> * The challenge here is to disambiguate the two forms (which is similar to * the problem of disambiguating a variable declaration from e.g. an * assignment). Getting this right is actually quite tricky, and we need to * consider what permissible things can follow a cast and/or a bracketed * expression. To simplify things, we only consider up to the end of the * current line in determining whether this is a cast or not. That means * that the expression following a cast *must* reside on the same line as * the cast. * </p> * * <p> * A cast can be followed by the start of any valid expression. This * includes: identifiers (e.g. "(T) x"), braces of various kinds (e.g. "(T) * [1,2]" or "(T) (1,2)"), unary operators (e.g. "(T) !x", "(T) |xs|", etc). * A bracketed expression, on the other hand, can be followed by a binary * operator (e.g. "(e) + 1"), a left- or right-brace (e.g. "(1 + (x+1))" or * "(*f)(1)") or a newline. * </p> * <p> * Most of these are easy to disambiguate by the following rules: * </p> * <ul> * <li>If what follows is a binary operator (e.g. +, -, etc) then this is an * bracketed expression, not a cast.</li> * <li>If what follows is a right-brace then this is a bracketed expression, * not a cast.</li> * <li>Otherwise, this is a cast.</li> * </ul> * <p> * Unfortunately, there are two problematic casts: '-' and '('. In Java, the * problem of '-' is resolved carefully as follows: * </p> * * <pre> * CastExpr::= ( PrimitiveType Dimsopt ) UnaryExpression * | ( ReferenceType ) UnaryExpressionNotPlusMinus * </pre> * * See JLS 15.16 (Cast Expressions). This means that, in cases where we can * be certain we have a type, then a general expression may follow; * otherwise, only a restricted expression may follow. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseBracketedExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(LeftBrace); // At this point, we must begin to disambiguate casts from general // bracketed expressions. In the case that what follows the left brace // is something which can only be a type, then clearly we have a cast. // However, in the other case, we may still have a cast since many types // cannot be clearly distinguished from expressions at this stage (e.g. // "(nat,nat)" could either be a tuple type (if "nat" is a type) or a // tuple expression (if "nat" is a variable or constant). SyntacticType t = parseDefiniteType(scope); if (t != null) { // At this point, it's looking likely that we have a cast. However, // it's not certain because of the potential for nested braces. For // example, consider "((char) x + y)". We'll parse the outermost // brace and what follows *must* be parsed as either a type, or // bracketed type. if (tryAndMatch(true, RightBrace) != null) { // Ok, finally, we are sure that it is definitely a cast. Expr e = parseExpression(wf, scope, terminated); return new Expr.Cast(t, e, sourceAttr(start, index - 1)); } } // We still may have either a cast or a bracketed expression, and we // cannot tell which yet. index = start; match(LeftBrace); Expr e = parseExpression(wf, scope, true); match(RightBrace); // Now check whether this must be an expression, or could still be a // cast. if (!mustParseAsExpr(e)) { // At this point, we may still have a cast. Therefore, we now // examine what follows to see whether this is a cast or bracketed // expression. See JavaDoc comments above for more on this. What we // do is first skip any whitespace, and then see what we've got. int next = skipLineSpace(index); if (next < tokens.size()) { Token lookahead = tokens.get(next); switch (lookahead.kind) { case Null: case True: case False: case ByteValue: case CharValue: case IntValue: case RealValue: case StringValue: case LeftSquare: case LeftCurly: // FIXME: there is a bug here when parsing a quantified // expression such as // // "all { i in 0 .. (|items| - 1) | items[i] < items[i + 1] // }" // // This is because the trailing vertical bar makes it look // like this is a cast. case LeftBrace: case VerticalBar: case Shreak: case Identifier: { // Ok, this must be cast so back tract and reparse // expression as a type. index = start; // backtrack SyntacticType type = parseType(scope); // Now, parse cast expression e = parseExpression(wf, scope, terminated); return new Expr.Cast(type, e, sourceAttr(start, index - 1)); } default: // default case, fall through and assume bracketed // expression } } } // Assume bracketed return e; } /** * Parse an array initialiser or generator expression, which is of the form: * * <pre> * ArrayExpr ::= '[' [ Expr (',' Expr)+ ] ']' * | '[' Expr ';' Expr ']' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseArrayInitialiserOrGeneratorExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(LeftSquare); Expr expr = parseExpression(wf, scope, true); // Finally, disambiguate if (tryAndMatch(true, SemiColon) != null) { // this is an array generator index = start; return parseArrayGeneratorExpression(wf, scope, terminated); } else { // this is an array initialiser index = start; return parseArrayInitialiserExpression(wf, scope, terminated); } } /** * Parse an array initialiser expression, which is of the form: * * <pre> * ArrayInitialiserExpr ::= '[' [ Expr (',' Expr)+ ] ']' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseArrayInitialiserExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(LeftSquare); ArrayList<Expr> exprs = new ArrayList<>(); boolean firstTime = true; do { if (!firstTime) { match(Comma); } firstTime = false; // NOTE: we require the following expression be a "non-tuple" // expression. That is, it cannot be composed using ',' unless // braces enclose the entire expression. This is because the outer // list constructor expression is used ',' to distinguish elements. // Also, expression is guaranteed to be terminated, either by ']' or // ','. exprs.add(parseExpression(wf, scope, true)); } while (eventuallyMatch(RightSquare) == null); return new Expr.ArrayInitialiser(exprs, sourceAttr(start, index - 1)); } /** * Parse an array generator expression, which is of the form: * * <pre> * ArrayGeneratorExpr ::= '[' Expr ';' Expr ']' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseArrayGeneratorExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(LeftSquare); Expr element = parseExpression(wf, scope, true); match(SemiColon); Expr count = parseExpression(wf, scope, true); match(RightSquare); return new Expr.ArrayGenerator(element, count, sourceAttr(start, index - 1)); } /** * Parse a record initialiser, which is of the form: * * <pre> * RecordExpr ::= '{' Identifier ':' Expr (',' Identifier ':' Expr)* '}' * </pre> * * During parsing, we additionally check that each identifier is unique; * otherwise, an error is reported. * * @param name * An optional name component for the record initialiser. If * null, then this is an anonymous record initialiser. Otherwise, * it is a named record initialiser. * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseRecordInitialiser(String name, WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(LeftCurly); HashSet<String> keys = new HashSet<>(); HashMap<String, Expr> exprs = new HashMap<>(); boolean firstTime = true; while (eventuallyMatch(RightCurly) == null) { if (!firstTime) { match(Comma); } firstTime = false; // Parse field name being constructed Token n = match(Identifier); // Check field name is unique if (keys.contains(n.text)) { syntaxError("duplicate record key", n); } match(Colon); // Parse expression being assigned to field // NOTE: we require the following expression be a "non-tuple" // expression. That is, it cannot be composed using ',' unless // braces enclose the entire expression. This is because the outer // record constructor expression is used ',' to distinguish fields. // Also, expression is guaranteed to be terminated, either by '}' or // ','. Expr e = parseExpression(wf, scope, true); exprs.put(n.text, e); keys.add(n.text); } return new Expr.Record(name, exprs, sourceAttr(start, index - 1)); } /** * Parse a new expression, which is of the form: * * <pre> * TermExpr::= ... * | "new" Expr * | Lifetime ":" "new" Identifier Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseNewExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; // try to match a lifetime String lifetime; Token lifetimeIdentifier = tryAndMatch(terminated, Identifier, This, Star); if (lifetimeIdentifier != null) { scope.mustBeLifetime(lifetimeIdentifier); lifetime = lifetimeIdentifier.text; match(Colon); } else { lifetime = "*"; } match(New); Expr e = parseExpression(wf, scope, terminated); return new Expr.New(e, lifetime, sourceAttr(start, index - 1)); } /** * Parse a length of expression, which is of the form: * * <pre> * TermExpr::= ... * | '|' Expr '|' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseLengthOfExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(VerticalBar); // We have to parse an Append Expression here, which is the most general // form of expression that can generate a collection of some kind. All // expressions higher up (e.g. logical expressions) cannot generate // collections. Furthermore, the bitwise or expression could lead to // ambiguity and, hence, we bypass that an consider append expressions // only. However, the expression is guaranteed to be terminated by '|'. Expr e = parseShiftExpression(wf, scope, true); match(VerticalBar); return new Expr.UnOp(Expr.UOp.ARRAYLENGTH, e, sourceAttr(start, index - 1)); } /** * Parse a negation expression, which is of the form: * * <pre> * TermExpr::= ... * | '-' Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseNegationExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Minus); Expr e = parseAccessExpression(wf, scope, terminated); return new Expr.UnOp(Expr.UOp.NEG, e, sourceAttr(start, index - 1)); } /** * Parse an invocation expression, which has the form: * * <pre> * InvokeExpr::= Identifier '(' [ Expr (',' Expr)* ] ')' * </pre> * * Observe that this when this function is called, we're assuming that the * identifier and opening brace has already been matched. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseInvokeExpression(WhileyFile wf, EnclosingScope scope, int start, Token name, boolean terminated, List<String> lifetimeArguments) { // First, parse the arguments to this invocation. ArrayList<Expr> args = parseInvocationArguments(wf, scope); // Second, determine what kind of invocation we have. If the name of the // method is a local variable, then it must be an indirect invocation on // this variable. if (scope.isVariable(name.text)) { // indirect invocation on local variable Expr.LocalVariable lv = new Expr.LocalVariable(name.text, sourceAttr(start, start)); return new Expr.AbstractIndirectInvoke(lv, args, lifetimeArguments, sourceAttr(start, index - 1)); } else { // unqualified direct invocation return new Expr.AbstractInvoke(name.text, null, args, lifetimeArguments, sourceAttr(start, index - 1)); } } /** * Parse a sequence of arguments separated by commas that ends in a * right-brace: * * <pre> * ArgumentList ::= [ Expr (',' Expr)* ] ')' * </pre> * * Note, when this function is called we're assuming the left brace was * already parsed. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private ArrayList<Expr> parseInvocationArguments(WhileyFile wf, EnclosingScope scope) { boolean firstTime = true; ArrayList<Expr> args = new ArrayList<>(); while (eventuallyMatch(RightBrace) == null) { if (!firstTime) { match(Comma); } else { firstTime = false; } // NOTE: we require the following expression be a "non-tuple" // expression. That is, it cannot be composed using ',' unless // braces enclose the entire expression. This is because the outer // invocation expression is used ',' to distinguish arguments. // However, expression is guaranteed to be terminated either by ')' // or by ','. Expr e = parseExpression(wf, scope, true); args.add(e); } return args; } /** * Parse a sequence of lifetime arguments separated by commas that ends in a * right-angle: * * <pre> * LifetimeArguments ::= [ Lifetime (',' Lifetime)* ] '>' * </pre> * * Note, when this function is called we're assuming the left angle was * already parsed. * * @param wf * @param scope * @return */ private ArrayList<String> parseLifetimeArguments(WhileyFile wf, EnclosingScope scope) { boolean firstTime = true; ArrayList<String> lifetimeArgs = new ArrayList<>(); while (eventuallyMatch(RightAngle) == null) { if (!firstTime) { match(Comma); } else { firstTime = false; } // termindated by '>' String lifetime = parseLifetime(scope, true); lifetimeArgs.add(lifetime); } return lifetimeArgs; } /** * Parse a logical not expression, which has the form: * * <pre> * TermExpr::= ... * | '!' Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseLogicalNotExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Shreak); // Note: cannot parse unit expression here, because that messes up the // precedence. For example, !result ==> other should be parsed as // (!result) ==> other, not !(result ==> other). Expr expression = parseConditionExpression(wf, scope, terminated); return new Expr.UnOp(Expr.UOp.NOT, expression, sourceAttr(start, index - 1)); } /** * Parse a dereference expression, which has the form: * * <pre> * TermExpr::= ... * | '*' Expr * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * * @return */ private Expr parseDereferenceExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Star); Expr expression = parseTermExpression(wf, scope, terminated); return new Expr.Dereference(expression, sourceAttr(start, index - 1)); } /** * Parse a lambda or address expression, which have the form: * * <pre> * TermExpr::= ... * | '&' [ '[' [ Lifetime (',' Lifetime )* ] ']' ] * [ '<' [ Identifier (',' Identifier)* ] '>' ] * '(' [Type Identifier (',' Type Identifier)*] '->' Expr ')' * | '&' Identifier [ '(' Type (',' Type)* ')'] * </pre> * * Disambiguating these two forms is relatively straightforward, and we just * look to see what follows the '&'. * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseLambdaOrAddressExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Ampersand); if (tryAndMatch(terminated, LeftBrace, LeftSquare, LeftAngle) != null) { index = start; // backtrack return parseLambdaExpression(wf, scope, terminated); } else { index = start; // backtrack return parseAddressExpression(wf, scope, terminated); } } /** * Parse a lambda expression, which has the form: * * <pre> * TermExpr::= ... * | '&' [ '[' [ Lifetime (',' Lifetime )* ] ']' ] * [ '<' [ Identifier (',' Identifier)* ] '>' ] * '(' [Type Identifier (',' Type Identifier)*] '->' Expr ')' * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseLambdaExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Ampersand); // First parse the context lifetimes with the original scope Set<String> contextLifetimes = parseOptionalContextLifetimes(scope); // Now we create a new scope for this lambda expression. // It keeps all variables but only the given context lifetimes. // But it keeps all unavailable names, i.e. unaccessible lifetimes // from the outer scope cannot be redeclared. scope = scope.newEnclosingScope(contextLifetimes); // Parse the optional lifetime parameters List<String> lifetimeParameters = parseOptionalLifetimeParameters(scope); match(LeftBrace); ArrayList<WhileyFile.Parameter> parameters = new ArrayList<>(); boolean firstTime = true; while (eventuallyMatch(MinusGreater) == null) { int p_start = index; if (!firstTime) { match(Comma); } firstTime = false; SyntacticType type = parseType(scope); Token id = match(Identifier); scope.declareVariable(id); parameters.add(wf.new Parameter(type, id.text, sourceAttr(p_start, index - 1))); } // NOTE: expression guanrateed to be terminated by ')' Expr body = parseExpression(wf, scope, true); match(RightBrace); return new Expr.Lambda(parameters, contextLifetimes, lifetimeParameters, body, sourceAttr(start, index - 1)); } /** * Parse an address expression, which has the form: * * <pre> * TermExpr::= ... * | '&' Identifier [ '(' Type (',' Type)* ')'] * </pre> * * @param wf * The enclosing WhileyFile being constructed. This is necessary * to construct some nested declarations (e.g. parameters for * lambdas) * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseAddressExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Ampersand); Token id = match(Identifier); // Check whether or not parameters are supplied if (tryAndMatch(terminated, LeftBrace) != null) { // Yes, parameters are supplied! ArrayList<SyntacticType> parameters = new ArrayList<>(); boolean firstTime = true; while (eventuallyMatch(RightBrace) == null) { int p_start = index; if (!firstTime) { match(Comma); } firstTime = false; SyntacticType type = parseType(scope); parameters.add(type); } return new Expr.AbstractFunctionOrMethod(id.text, parameters, null, sourceAttr(start, index - 1)); } else { // No, parameters are not supplied. return new Expr.AbstractFunctionOrMethod(id.text, null, null, sourceAttr(start, index - 1)); } } /** * Parse a bitwise complement expression, which has the form: * * <pre> * TermExpr::= ... * | '~' Expr// bitwise complement * </pre> * * @param scope * The enclosing scope for this statement, which determines the * set of visible (i.e. declared) variables and also the current * indentation level. * @param terminated * This indicates that the expression is known to be terminated * (or not). An expression that's known to be terminated is one * which is guaranteed to be followed by something. This is * important because it means that we can ignore any newline * characters encountered in parsing this expression, and that * we'll never overrun the end of the expression (i.e. because * there's guaranteed to be something which terminates this * expression). A classic situation where terminated is true is * when parsing an expression surrounded in braces. In such case, * we know the right-brace will always terminate this expression. * * @return */ private Expr parseBitwiseComplementExpression(WhileyFile wf, EnclosingScope scope, boolean terminated) { int start = index; match(Tilde); Expr expression = parseExpression(wf, scope, terminated); return new Expr.UnOp(Expr.UOp.INVERT, expression, sourceAttr(start, index - 1)); } /** * Attempt to parse something which maybe a type, or an expression. The * semantics of this function dictate that it returns an instanceof * SyntacticType *only* if what it finds *cannot* be parsed as an * expression, but can be parsed as a type. Otherwise, the state is left * unchanged. * * @return An instance of SyntacticType or null. */ public SyntacticType parseDefiniteType(EnclosingScope scope) { int start = index; // backtrack point try { SyntacticType type = parseType(scope); if (mustParseAsType(type)) { return type; } } catch (SyntaxError e) { } index = start; // backtrack return null; } /** * <p> * Determine whether or not the given type can be parsed as an expression. * In many cases, a type can (e.g. <code>{x}</code> is both a valid type and * expression). However, some types are not also expressions (e.g. * <code>int</code>, <code>{int f}</code>, <code>&int</code>, etc). * </p> * * <p> * This function *must* return false if what the given type could not be * parsed as an expression. However, if what it can be parsed as an * expression, then this function must return false (even if we will * eventually treat this as a type). This function is called from either the * beginning of a statement (i.e. to disambiguate variable declarations), or * after matching a left brace (i.e. to disambiguate casts). * </p> * * @param index * Position in the token stream to begin looking from. * @return */ private boolean mustParseAsType(SyntacticType type) { if (type instanceof SyntacticType.Primitive) { // All primitive types must be parsed as types, since their // identifiers are keywords. return true; } else if (type instanceof SyntacticType.Record) { // Record types must be parsed as types, since e.g. {int f} is not a // valid expression. return true; } else if (type instanceof SyntacticType.FunctionOrMethod) { // "function" and "method" are keywords, cannot parse as expression. return true; } else if (type instanceof SyntacticType.Intersection) { SyntacticType.Intersection tt = (SyntacticType.Intersection) type; boolean result = false; for (SyntacticType element : tt.bounds) { result |= mustParseAsType(element); } return result; } else if (type instanceof SyntacticType.Array) { return true; } else if (type instanceof SyntacticType.Negation) { SyntacticType.Negation tt = (SyntacticType.Negation) type; return mustParseAsType(tt.element); } else if (type instanceof SyntacticType.Nominal) { return false; // always can be an expression } else if (type instanceof SyntacticType.Reference) { SyntacticType.Reference tt = (SyntacticType.Reference) type; if (tt.lifetime.equals("this") || tt.lifetime.equals("*") && tt.lifetimeWasExplicit) { // &this and &* is not a valid expression because "this" is // keyword // &ident could also be an address expression return true; } return mustParseAsType(tt.element); } else if (type instanceof SyntacticType.Union) { SyntacticType.Union tt = (SyntacticType.Union) type; boolean result = false; for (SyntacticType element : tt.bounds) { result |= mustParseAsType(element); } return result; } else { // Error! throw new InternalFailure("unknown syntactic type encountered", entry, type); } } /** * <p> * Determine whether a given expression can *only* be parsed as an * expression, not as a type. This is necessary to check whether a given * unknown expression could be a cast or not. If it must be parsed as an * expression, then it clearly cannot be parsed as a type and, hence, this * is not a cast. * </p> * <p> * The reason that something must be parsed as an expression is because it * contains something which cannot be part of a type. For example, * <code>(*x)</code> could not form part of a cast because the dereference * operator is not permitted within a type. In contrast, * <code>(x.y.f)</code> could be a type if e.g. <code>x.y</code> is a fully * qualified file and <code>f</code> a named item within that. * </p> * * @param e * Expression to be checked. * @return */ private boolean mustParseAsExpr(Expr e) { if (e instanceof Expr.LocalVariable) { return true; } else if (e instanceof Expr.AbstractVariable) { return false; // unknown } else if (e instanceof Expr.ConstantAccess) { return false; } else if (e instanceof Expr.FieldAccess) { Expr.FieldAccess fa = (Expr.FieldAccess) e; return mustParseAsExpr(fa.src); } else if (e instanceof Expr.BinOp) { Expr.BinOp bop = (Expr.BinOp) e; switch (bop.op) { case BITWISEOR: case BITWISEAND: return mustParseAsExpr(bop.lhs) || mustParseAsExpr(bop.rhs); } return false; } else if (e instanceof Expr.UnOp) { Expr.UnOp uop = (Expr.UnOp) e; switch (uop.op) { case NOT: return mustParseAsExpr(uop.mhs); case ARRAYLENGTH: case INVERT: return true; default: return false; } } else if (e instanceof Expr.AbstractFunctionOrMethod) { return true; } else if (e instanceof Expr.AbstractInvoke) { return true; } else if (e instanceof Expr.AbstractIndirectInvoke) { return true; } else if (e instanceof Expr.Dereference) { return true; } else if (e instanceof Expr.Cast) { return true; } else if (e instanceof Expr.Constant) { return true; } else if (e instanceof Expr.Quantifier) { return true; } else if (e instanceof Expr.IndexOf) { return true; } else if (e instanceof Expr.Lambda) { return true; } else if (e instanceof Expr.ArrayInitialiser) { return true; } else if (e instanceof Expr.New) { return true; } else if (e instanceof Expr.Record) { return true; } else { throw new InternalFailure("unknown expression encountered", entry, e); } } /** * Parse a top-level type, which is of the form: * * <pre> * TupleType ::= Type (',' Type)* * </pre> * * @see wyc.lang.SyntacticType.Tuple * @return */ private SyntacticType parseType(EnclosingScope scope) { return parseUnionType(scope); } /** * Parse a union type, which is of the form: * * <pre> * UnionType ::= IntersectionType ('|' IntersectionType)* * </pre> * * @return */ private SyntacticType parseUnionType(EnclosingScope scope) { int start = index; SyntacticType t = parseIntersectionType(scope); // Now, attempt to look for union and/or intersection types if (tryAndMatch(true, VerticalBar) != null) { // This is a union type ArrayList types = new ArrayList<SyntacticType>(); types.add(t); do { types.add(parseIntersectionType(scope)); } while (tryAndMatch(true, VerticalBar) != null); return new SyntacticType.Union(types, sourceAttr(start, index - 1)); } else { return t; } } /** * Parse an intersection type, which is of the form: * * <pre> * IntersectionType ::= BaseType ('&' BaseType)* * </pre> * * @return */ private SyntacticType parseIntersectionType(EnclosingScope scope) { int start = index; SyntacticType t = parseArrayType(scope); // Now, attempt to look for union and/or intersection types if (tryAndMatch(true, Ampersand) != null) { // This is a union type ArrayList types = new ArrayList<SyntacticType>(); types.add(t); do { types.add(parseArrayType(scope)); } while (tryAndMatch(true, Ampersand) != null); return new SyntacticType.Intersection(types, sourceAttr(start, index - 1)); } else { return t; } } /** * Parse an array type, which is of the form: * * <pre> * ArrayType ::= Type '[' ']' * </pre> * * @return */ private SyntacticType parseArrayType(EnclosingScope scope) { int start = index; SyntacticType element = parseBaseType(scope); while (tryAndMatch(true, LeftSquare) != null) { match(RightSquare); element = new SyntacticType.Array(element, sourceAttr(start, index - 1)); } return element; } private SyntacticType parseBaseType(EnclosingScope scope) { checkNotEof(); int start = index; Token token = tokens.get(index); SyntacticType t; switch (token.kind) { case Void: return new SyntacticType.Void(sourceAttr(start, index++)); case Any: return new SyntacticType.Any(sourceAttr(start, index++)); case Null: return new SyntacticType.Null(sourceAttr(start, index++)); case Bool: return new SyntacticType.Bool(sourceAttr(start, index++)); case Byte: return new SyntacticType.Byte(sourceAttr(start, index++)); case Int: return new SyntacticType.Int(sourceAttr(start, index++)); case LeftBrace: return parseBracketedType(scope); case LeftCurly: return parseRecordType(scope); case Shreak: return parseNegationType(scope); case Ampersand: return parseReferenceType(scope); case Identifier: return parseNominalType(); case Function: return parseFunctionOrMethodType(true, scope); case Method: return parseFunctionOrMethodType(false, scope); default: syntaxError("unknown type encountered", token); return null; } } /** * Parse a negation type, which is of the form: * * <pre> * NegationType ::= '!' Type * </pre> * * @return */ private SyntacticType parseNegationType(EnclosingScope scope) { int start = index; match(Shreak); SyntacticType element = parseArrayType(scope); return new SyntacticType.Negation(element, sourceAttr(start, index - 1)); } /** * Parse a reference type, which is of the form: * * <pre> * ReferenceType ::= '&' Type * | '&' Lifetime ':' Type * Lifetime ::= Identifier | 'this' | '*' * </pre> * * @return */ private SyntacticType parseReferenceType(EnclosingScope scope) { int start = index; match(Ampersand); // Try to parse an annotated lifetime int backtrack = index; Token lifetimeIdentifier = tryAndMatch(true, Identifier, This, Star); if (lifetimeIdentifier != null) { // We cannot allow a newline after the colon, as it would // unintentionally match a return type that happens to be reference // type without lifetime annotation (return type in method signature // is always followed by colon and newline). if (tryAndMatch(true, Colon) != null && !isAtEOL()) { // Now we know that there is an annotated lifetime scope.mustBeLifetime(lifetimeIdentifier); SyntacticType element = parseArrayType(scope); return new SyntacticType.Reference(element, lifetimeIdentifier.text, true, sourceAttr(start, index - 1)); } } index = backtrack; SyntacticType element = parseArrayType(scope); return new SyntacticType.Reference(element, "*", false, sourceAttr(start, index - 1)); } /** * Parse a currently declared lifetime. * * @return the matched lifetime name */ private String parseLifetime(EnclosingScope scope, boolean terminated) { int next = terminated ? skipWhiteSpace(index) : skipLineSpace(index); if (next < tokens.size()) { Token t = tokens.get(next); if (t.kind == Identifier || t.kind == This || t.kind == Star) { index = next + 1; scope.mustBeLifetime(t); return t.text; } syntaxError("expectiong a lifetime identifier here", t); } syntaxError("unexpected end-of-file", tokens.get(next - 1)); throw new RuntimeException("deadcode"); // dead-code } /** * Parse a bracketed type, which is of the form: * * <pre> * BracketedType ::= '(' Type ')' * </pre> * * @return */ private SyntacticType parseBracketedType(EnclosingScope scope) { int start = index; match(LeftBrace); SyntacticType type = parseType(scope); match(RightBrace); return type; } /** * Parse a set, map or record type, which are of the form: * * <pre> * SetType ::= '{' Type '}' * MapType ::= '{' Type "=>" Type '}' * RecordType ::= '{' Type Identifier (',' Type Identifier)* [ ',' "..." ] '}' * </pre> * * Disambiguating these three forms is relatively straightforward as all * three must be terminated by a right curly brace. Therefore, after parsing * the first Type, we simply check what follows. One complication is the * potential for "mixed types" where the field name and type and intertwined * (e.g. function read()->[byte]). * * @return */ private SyntacticType parseRecordType(EnclosingScope scope) { int start = index; match(LeftCurly); HashMap<String, SyntacticType> types = new HashMap<>(); Pair<SyntacticType, Token> p = parseMixedType(scope); types.put(p.second().text, p.first()); // Now, we continue to parse any remaining fields. boolean isOpen = false; while (eventuallyMatch(RightCurly) == null) { match(Comma); if (tryAndMatch(true, DotDotDot) != null) { // this signals an "open" record type match(RightCurly); isOpen = true; break; } else { p = parseMixedType(scope); Token id = p.second(); if (types.containsKey(id.text)) { syntaxError("duplicate record key", id); } types.put(id.text, p.first()); } } // Done return new SyntacticType.Record(isOpen, types, sourceAttr(start, index - 1)); } /** * Parse a nominal type, which is of the form: * * <pre> * NominalType ::= Identifier ('.' Identifier)* * </pre> * * @see wyc.lang.SyntacticType.Nominal * @return */ private SyntacticType parseNominalType() { int start = index; ArrayList<String> names = new ArrayList<>(); // Match one or more identifiers separated by dots do { names.add(match(Identifier).text); } while (tryAndMatch(true, Dot) != null); return new SyntacticType.Nominal(names, sourceAttr(start, index - 1)); } /** * Parse a function or method type, which is of the form: * * <pre> * FunctionType ::= "function" [Type (',' Type)* ] "->" Type * MethodType ::= "method" [Type (',' Type)* ] "->" Type * </pre> * * At the moment, it is required that parameters for a function or method * type are enclosed in braces. In principle, we would like to relax this. * However, this is difficult to make work because there is not way to * invoke a function or method without using braces. * * @return */ private SyntacticType parseFunctionOrMethodType(boolean isFunction, EnclosingScope scope) { int start = index; List<String> lifetimeParameters; Set<String> contextLifetimes; if (isFunction) { match(Function); contextLifetimes = Collections.emptySet(); lifetimeParameters = Collections.emptyList(); } else { match(Method); contextLifetimes = parseOptionalContextLifetimes(scope); scope = scope.newEnclosingScope(); lifetimeParameters = parseOptionalLifetimeParameters(scope); } // First, parse the parameter type(s). List<SyntacticType> paramTypes = parseParameterTypes(scope); List<SyntacticType> returnTypes = Collections.emptyList(); // Second, parse the right arrow. if (isFunction) { // Functions require a return type (since otherwise they are just // nops) match(MinusGreater); // Third, parse the return types. returnTypes = parseOptionalParameterTypes(scope); } else if (tryAndMatch(true, MinusGreater) != null) { // Methods have an optional return type // Third, parse the return type returnTypes = parseOptionalParameterTypes(scope); } // Done if (isFunction) { return new SyntacticType.Function(returnTypes, paramTypes, sourceAttr(start, index - 1)); } else { return new SyntacticType.Method(returnTypes, paramTypes, contextLifetimes, lifetimeParameters, sourceAttr(start, index - 1)); } } /** * Parse a potentially mixed-type, which is of the form: * * <pre> * MixedType ::= Type Identifier * | "function" Type Identifier '(' [Type (',' Type)* ] ')' "->" Type * | "method" Type Identifier '(' [Type (',' Type)* ] ')' "->" Type * </pre> * * @return */ private Pair<SyntacticType, Token> parseMixedType(EnclosingScope scope) { Token lookahead; int start = index; if ((lookahead = tryAndMatch(true, Function, Method)) != null) { // At this point, we *might* have a mixed function / method type // definition. To disambiguate, we need to see whether an identifier // follows or not. // Similar to normal method declarations, the lifetime parameters // go before the method name. We do not allow to have context // lifetimes // for mixed method types. List<String> lifetimeParameters = Collections.emptyList(); if (lookahead.kind == Method && tryAndMatch(true, LeftAngle) != null) { // mixed method type with lifetime parameters scope = scope.newEnclosingScope(); lifetimeParameters = parseLifetimeParameters(scope); } // Now try to parse the identifier Token id = tryAndMatch(true, Identifier); if (id != null) { // Yes, we have found a mixed function / method type definition. // Therefore, we continue to pass the remaining type parameters. List<SyntacticType> paramTypes = parseParameterTypes(scope); List<SyntacticType> returnTypes = Collections.emptyList(); if (lookahead.kind == Function) { // Functions require a return type (since otherwise they are // just nops) match(MinusGreater); // Third, parse the return type returnTypes = parseOptionalParameterTypes(scope); } else if (tryAndMatch(true, MinusGreater) != null) { // Third, parse the (optional) return type. Observe that // this is forced to be a // unit type. This means that any tuple return types must be // in braces. The reason for this is that a trailing comma // may be part of an enclosing record type and we must // disambiguate // this. returnTypes = parseOptionalParameterTypes(scope); } // Done SyntacticType type; if (lookahead.kind == Token.Kind.Function) { type = new SyntacticType.Function(returnTypes, paramTypes, sourceAttr(start, index - 1)); } else { type = new SyntacticType.Method(returnTypes, paramTypes, Collections.<String>emptySet(), lifetimeParameters, sourceAttr(start, index - 1)); } return new Pair<>(type, id); } else { // In this case, we failed to match a mixed type. Therefore, we // backtrack and parse as two separate items (i.e. type // identifier). index = start; // backtrack } } // This is the normal case, where we expect an identifier to follow the // type. SyntacticType type = parseType(scope); Token id = match(Identifier); return new Pair<>(type, id); } public List<SyntacticType> parseOptionalParameterTypes(EnclosingScope scope) { int next = skipWhiteSpace(index); if (next < tokens.size() && tokens.get(next).kind == LeftBrace) { return parseParameterTypes(scope); } else { SyntacticType t = parseType(scope); ArrayList<SyntacticType> rs = new ArrayList<>(); rs.add(t); return rs; } } public List<SyntacticType> parseParameterTypes(EnclosingScope scope) { ArrayList<SyntacticType> paramTypes = new ArrayList<>(); match(LeftBrace); boolean firstTime = true; while (eventuallyMatch(RightBrace) == null) { if (!firstTime) { match(Comma); } firstTime = false; paramTypes.add(parseType(scope)); } return paramTypes; } /** * Attention: Enters the lifetime names to the passed scope! * * @param scope * @return */ public List<String> parseOptionalLifetimeParameters(EnclosingScope scope) { if (tryAndMatch(true, LeftAngle) != null && tryAndMatch(true, RightAngle) == null) { // The if above skips an empty list of identifiers "<>"! return parseLifetimeParameters(scope); } return Collections.emptyList(); } /** * Attention: Enters the lifetime names to the passed scope! Assumes that * '<' has already been matched. * * @param scope * @return */ private List<String> parseLifetimeParameters(EnclosingScope scope) { List<String> lifetimeParameters = new ArrayList<>(); do { Token lifetimeIdentifier = match(Identifier); scope.declareLifetime(lifetimeIdentifier); lifetimeParameters.add(lifetimeIdentifier.text); } while (tryAndMatch(true, Comma) != null); match(RightAngle); return lifetimeParameters; } /** * @param scope * @return */ public Set<String> parseOptionalContextLifetimes(EnclosingScope scope) { if (tryAndMatch(true, LeftSquare) != null && tryAndMatch(true, RightSquare) == null) { // The if above skips an empty list of identifiers "[]"! Set<String> contextLifetimes = new HashSet<>(); do { contextLifetimes.add(parseLifetime(scope, true)); } while (tryAndMatch(true, Comma) != null); match(RightSquare); return contextLifetimes; } return Collections.emptySet(); } public boolean mustParseAsMixedType() { int start = index; if (tryAndMatch(true, Function, Method) != null && tryAndMatch(true, Identifier) != null) { // Yes, this is a mixed type index = start; return true; } else { // No, this is not a mixed type index = start; return false; } } /** * Match a given token kind, whilst moving passed any whitespace encountered * inbetween. In the case that meet the end of the stream, or we don't match * the expected token, then an error is thrown. * * @param kind * @return */ private Token match(Token.Kind kind) { checkNotEof(); Token token = tokens.get(index++); if (token.kind != kind) { syntaxError("expecting \"" + kind + "\" here", token); } return token; } /** * Match a given sequence of tokens, whilst moving passed any whitespace * encountered inbetween. In the case that meet the end of the stream, or we * don't match the expected tokens in the expected order, then an error is * thrown. * * @param operator * @return */ private Token[] match(Token.Kind... kinds) { Token[] result = new Token[kinds.length]; for (int i = 0; i != result.length; ++i) { checkNotEof(); Token token = tokens.get(index++); if (token.kind == kinds[i]) { result[i] = token; } else { syntaxError("Expected \"" + kinds[i] + "\" here", token); } } return result; } /** * Attempt to match a given kind of token with the view that it must * *eventually* be matched. This differs from <code>tryAndMatch()</code> * because it calls <code>checkNotEof()</code>. Thus, it is guaranteed to * skip any whitespace encountered in between. This is safe because we know * there is a terminating token still to come. * * @param kind * @return */ private Token eventuallyMatch(Token.Kind kind) { checkNotEof(); Token token = tokens.get(index); if (token.kind != kind) { return null; } else { index = index + 1; return token; } } /** * Attempt to match a given token(s) at a given level of indent, whilst * ignoring any whitespace in between. Note that, in the case it fails to * match, then the index will be unchanged. This latter point is important, * otherwise we could accidentally gobble up some important indentation. If * more than one kind is provided then this will try to match any of them. * * @param terminated * Indicates whether or not this function should be concerned * with new lines. The terminated flag indicates whether or not * the current construct being parsed is known to be terminated. * If so, then we don't need to worry about newlines and can * greedily consume them (i.e. since we'll eventually run into * the terminating symbol). * @param indent * The indentation level to try and match the tokens at. * @param kinds * * @return */ private Token tryAndMatchAtIndent(boolean terminated, Indent indent, Token.Kind... kinds) { int start = index; Indent r = getIndent(); if (r != null && r.equivalent(indent)) { Token t = tryAndMatch(terminated, kinds); if (t != null) { return r; } } // backtrack in all failing cases. index = start; return null; } /** * Attempt to match a given token(s), whilst ignoring any whitespace in * between. Note that, in the case it fails to match, then the index will be * unchanged. This latter point is important, otherwise we could * accidentally gobble up some important indentation. If more than one kind * is provided then this will try to match any of them. * * @param terminated * Indicates whether or not this function should be concerned * with new lines. The terminated flag indicates whether or not * the current construct being parsed is known to be terminated. * If so, then we don't need to worry about newlines and can * greedily consume them (i.e. since we'll eventually run into * the terminating symbol). * @param kinds * * @return */ private Token tryAndMatch(boolean terminated, Token.Kind... kinds) { // If the construct being parsed is know to be terminated, then we can // skip all whitespace. Otherwise, we can't skip newlines as these are // significant. int next = terminated ? skipWhiteSpace(index) : skipLineSpace(index); if (next < tokens.size()) { Token t = tokens.get(next); for (int i = 0; i != kinds.length; ++i) { if (t.kind == kinds[i]) { index = next + 1; return t; } } } return null; } /** * Attempt to match a given sequence of tokens in the given order, whilst * ignoring any whitespace in between. Note that, in any case, the index * will be unchanged! * * @param terminated * Indicates whether or not this function should be concerned * with new lines. The terminated flag indicates whether or not * the current construct being parsed is known to be terminated. * If so, then we don't need to worry about newlines and can * greedily consume them (i.e. since we'll eventually run into * the terminating symbol). * @param kinds * * @return whether the sequence matches */ private boolean lookaheadSequence(boolean terminated, Token.Kind... kinds) { int next = index; for (Token.Kind k : kinds) { next = terminated ? skipWhiteSpace(next) : skipLineSpace(next); if (next >= tokens.size() || tokens.get(next++).kind != k) { return false; } } return true; } /** * Check whether the current index is, after skipping all line spaces, at * the end of a line. This method does not change the state! * * @return whether index is at end of line */ private boolean isAtEOL() { int next = skipLineSpace(index); return next >= tokens.size() || tokens.get(next).kind == NewLine; } /** * Attempt to match a given token on the *same* line, whilst ignoring any * whitespace in between. Note that, in the case it fails to match, then the * index will be unchanged. This latter point is important, otherwise we * could accidentally gobble up some important indentation. * * @param kind * @return */ private Token tryAndMatchOnLine(Token.Kind kind) { int next = skipLineSpace(index); if (next < tokens.size()) { Token t = tokens.get(next); if (t.kind == kind) { index = next + 1; return t; } } return null; } /** * Match a the end of a line. This is required to signal, for example, the * end of the current statement. */ private void matchEndLine() { // First, parse all whitespace characters except for new lines index = skipLineSpace(index); // Second, check whether we've reached the end-of-file (as signaled by // running out of tokens), or we've encountered some token which not a // newline. if (index >= tokens.size()) { return; // EOF } else if (tokens.get(index).kind != NewLine) { syntaxError("expected end-of-line", tokens.get(index)); } else { index = index + 1; } } /** * Check that the End-Of-File has not been reached. This method should be * called from contexts where we are expecting something to follow. */ private void checkNotEof() { skipWhiteSpace(); if (index >= tokens.size()) { if (index > 0) { syntaxError("unexpected end-of-file", tokens.get(index - 1)); } else { // I believe this is actually dead-code, since checkNotEof() // won't be called before at least one token is matched. throw new SyntaxError("unexpected end-of-file", entry, null); } } } /** * Skip over any whitespace characters. */ private void skipWhiteSpace() { index = skipWhiteSpace(index); } /** * Skip over any whitespace characters, starting from a given index and * returning the first index passed any whitespace encountered. */ private int skipWhiteSpace(int index) { while (index < tokens.size() && isWhiteSpace(tokens.get(index))) { index++; } return index; } /** * Skip over any whitespace characters that are permitted on a given line * (i.e. all except newlines), starting from a given index and returning the * first index passed any whitespace encountered. */ private int skipLineSpace(int index) { while (index < tokens.size() && isLineSpace(tokens.get(index))) { index++; } return index; } /** * Skip over any empty lines. That is lines which contain only whitespace * and comments. */ private void skipEmptyLines() { int tmp = index; do { tmp = skipLineSpace(tmp); if (tmp < tokens.size() && tokens.get(tmp).kind != Token.Kind.NewLine) { return; // done } else if (tmp >= tokens.size()) { index = tmp; return; // end-of-file reached } // otherwise, skip newline and continue tmp = tmp + 1; index = tmp; } while (true); // deadcode } /** * Define what is considered to be whitespace. * * @param token * @return */ private boolean isWhiteSpace(Token token) { return token.kind == Token.Kind.NewLine || isLineSpace(token); } /** * Define what is considered to be linespace. * * @param token * @return */ private boolean isLineSpace(Token token) { return token.kind == Token.Kind.Indent || token.kind == Token.Kind.LineComment || token.kind == Token.Kind.BlockComment; } /** * Parse a character from a string of the form 'c' or '\c'. * * @param input * @return */ private BigInteger parseCharacter(String input) { int pos = 1; char c = input.charAt(pos++); if (c == '\\') { // escape code switch (input.charAt(pos++)) { case 'b': c = '\b'; break; case 't': c = '\t'; break; case 'n': c = '\n'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; case '"': c = '\"'; break; case '\'': c = '\''; break; case '\\': c = '\\'; break; default: throw new RuntimeException("unrecognised escape character"); } } return BigInteger.valueOf(c); } /** * Parse a string constant whilst interpreting all escape characters. * * @param v * @return */ protected List<Constant> parseString(String v) { /* * Parsing a string requires several steps to be taken. First, we need * to strip quotes from the ends of the string. */ v = v.substring(1, v.length() - 1); ArrayList<Constant> result = new ArrayList<>(); // Second, step through the string and replace escaped characters for (int i = 0; i < v.length(); i++) { if (v.charAt(i) == '\\') { if (v.length() <= i + 1) { throw new RuntimeException("unexpected end-of-string"); } else { char replace = 0; int len = 2; switch (v.charAt(i + 1)) { case 'b': replace = '\b'; break; case 't': replace = '\t'; break; case 'n': replace = '\n'; break; case 'f': replace = '\f'; break; case 'r': replace = '\r'; break; case '"': replace = '\"'; break; case '\'': replace = '\''; break; case '\\': replace = '\\'; break; case 'u': len = 6; // unicode escapes are six digits long, // including "slash u" String unicode = v.substring(i + 2, i + 6); replace = (char) Integer.parseInt(unicode, 16); // unicode i = i + 5; break; default: throw new RuntimeException("unknown escape character"); } result.add(new Constant.Integer(BigInteger.valueOf(replace))); i = i + 1; } } else { result.add(new Constant.Integer(BigInteger.valueOf(v.charAt(i)))); } } return result; } /** * Parse a token representing a byte value. Every such token is a sequence * of one or more binary digits ('0' or '1') followed by 'b'. For example, * "00110b" is parsed as the byte value 6. * * @param input * The token representing the byte value. * @return */ private byte parseByte(Token input) { String text = input.text; if (text.length() > 9) { syntaxError("invalid binary literal (too long)", input); } int val = 0; for (int i = 0; i != text.length() - 1; ++i) { val = val << 1; char c = text.charAt(i); if (c == '1') { val = val | 1; } else if (c == '0') { } else { syntaxError("invalid binary literal (invalid characters)", input); } } return (byte) val; } private Attribute.Source sourceAttr(int start, int end) { Token t1 = tokens.get(start); Token t2 = tokens.get(end); // FIXME: problem here with the line numbering ? return new Attribute.Source(t1.start, t2.end(), 0); } private void syntaxError(String msg, SyntacticElement e) { Attribute.Source loc = e.attribute(Attribute.Source.class); throw new SyntaxError(msg, entry, e); } private void syntaxError(String msg, Token t) { // FIXME: this is clearly not a sensible approach SyntacticElement unknown = new SyntacticElement.Impl() { }; unknown.attributes().add(new Attribute.Source(t.start, t.start + t.text.length() - 1, -1)); throw new SyntaxError(msg, entry, unknown); } /** * Represents a given amount of indentation. Specifically, a count of tabs * and spaces. Observe that the order in which tabs / spaces occurred is not * retained. * * @author David J. Pearce * */ private static class Indent extends Token { private final int countOfSpaces; private final int countOfTabs; public Indent(String text, int pos) { super(Token.Kind.Indent, text, pos); // Count the number of spaces and tabs int nSpaces = 0; int nTabs = 0; for (int i = 0; i != text.length(); ++i) { char c = text.charAt(i); switch (c) { case ' ': nSpaces++; break; case '\t': nTabs++; break; default: throw new IllegalArgumentException("Space or tab character expected"); } } countOfSpaces = nSpaces; countOfTabs = nTabs; } /** * Test whether this indentation is considered "less than or equivalent" * to another indentation. For example, an indentation of 2 spaces is * considered less than an indentation of 3 spaces, etc. * * @param other * The indent to compare against. * @return */ public boolean lessThanEq(Indent other) { return countOfSpaces <= other.countOfSpaces && countOfTabs <= other.countOfTabs; } /** * Test whether this indentation is considered "equivalent" to another * indentation. For example, an indentation of 3 spaces followed by 1 * tab is considered equivalent to an indentation of 1 tab followed by 3 * spaces, etc. * * @param other * The indent to compare against. * @return */ public boolean equivalent(Indent other) { return countOfSpaces == other.countOfSpaces && countOfTabs == other.countOfTabs; } } /** * An abstract indentation which represents the indentation of top-level * declarations, such as function declarations. This is used to simplify the * code for parsing indentation. */ private static final Indent ROOT_INDENT = new Indent("", 0); /** * The enclosing scope provides contextual information about the enclosing * scope for the given statement or expression being parsed. * * @author David J. Pearce * */ private class EnclosingScope { /** * The indent level of the enclosing scope. */ private final Indent indent; /** * The set of declared variables in the enclosing scope. */ private final HashSet<String> variables; /** * The set of field aliases in the enclosing scope. A field alias occurs * for a record declaration where, for convenience, we allow the type * invariant to refer directly to the field, rather than through a * declared variable. */ private final HashSet<String> fieldAliases; /** * The set of declared lifetimes in the enclosing scope. */ private final HashSet<String> lifetimes; /** * The set of all names that cannot be used for variables or lifetimes. * They are either in the variables or lifetimes set, or a special * lifetime, or they are unavailable because it is an unaccessible * lifetime from an outer scope. */ private final HashSet<String> unavailableNames; /** * A simple flag that tells us whether or not we are currently within a * loop. This is necessary to stop break or continue statements which * are written outside of a loop. */ private final boolean inLoop; public EnclosingScope() { this.indent = ROOT_INDENT; this.variables = new HashSet<>(); this.fieldAliases = new HashSet<>(); this.lifetimes = new HashSet<>(); this.unavailableNames = new HashSet<>(); this.inLoop = false; // prevent declaring these lifetimes this.unavailableNames.add("*"); this.unavailableNames.add("this"); } private EnclosingScope(Indent indent, Set<String> variables, Set<String> fieldAliases, Set<String> lifetimes, Set<String> unavailableNames, boolean inLoop) { this.indent = indent; this.variables = new HashSet<>(variables); this.fieldAliases = new HashSet<>(fieldAliases); this.lifetimes = new HashSet<>(lifetimes); this.unavailableNames = new HashSet<>(unavailableNames); this.inLoop = inLoop; } public Indent getIndent() { return indent; } public boolean isInLoop() { return inLoop; } /** * Check whether a given name corresponds to a declared variable in this * scope. * * @param name * @return */ public boolean isVariable(String name) { return this.variables.contains(name); } /** * Check whether a given name corresponds to a "field alias" in this * scope. A field alias occurs for a record declaration where, for * convenience, we allow the type invariant to refer directly to the * field, rather than through a declared variable. */ public boolean isFieldAlias(String name) { return fieldAliases.contains(name); } /** * Check whether a given name corresponds to a declared lifetime in this * scope. * * @param name * @return */ public boolean isLifetime(String name) { return name.equals("*") || this.lifetimes.contains(name); } /** * Checks that the given identifier is a declared lifetime. * * @param id * @throws SyntaxError * if the given identifier is not a lifetime */ public void mustBeLifetime(Token id) { if (!this.isLifetime(id.text)) { syntaxError("use of undeclared lifetime", id); } } /** * Check whether a given name is available, i.e. can be declared. * * @param id * identifier that holds the name to check * @throws SyntaxError * if the name is unavailable (already declared) */ public void checkNameAvailable(Token id) { if (this.unavailableNames.contains(id.text)) { // name is not available! syntaxError("name already declared", id); } } /** * Check whether a given name is available, i.e. can be declared. * * @param p * parameter that holds the name to check * @throws SyntaxError * if the name is unavailable (already declared) */ public void checkNameAvailable(Parameter p) { if (this.unavailableNames.contains(p.name)) { // name is not available! syntaxError("name already declared", p); } } /** * Declare a new variable in this scope. * * @param id * identifier that holds the name to declare * @throws SyntaxError * if the name is already declared */ public void declareVariable(Token id) { if (!this.unavailableNames.add(id.text)) { // name is not available! syntaxError("name already declared", id); } this.variables.add(id.text); } /** * Declare a new variable in this scope. * * @param p * parameter that holds the name to declare * @throws SyntaxError * if the name is already declared */ public void declareVariable(Parameter p) { if (!this.unavailableNames.add(p.name)) { // name is not available! syntaxError("name already declared", p); } this.variables.add(p.name); } /** * Declare a new field alias in this scope. * * @param alias * The field alias to declare */ public void declareFieldAlias(String alias) { fieldAliases.add(alias); } /** * Declare a new lifetime in this scope. * * @param id * identifier that holds the name to declare * @throws SyntaxError * if the name is already declared */ public void declareLifetime(Token id) { if (!this.unavailableNames.add(id.text)) { // name is not available! syntaxError("name already declared", id); } this.lifetimes.add(id.text); } /** * Make lifetime "this" available. */ public void declareThisLifetime() { this.lifetimes.add("this"); } /** * Create a new enclosing scope in which variables can be declared which * are remain invisible to this enclosing scope. All variables declared * in this enclosing scope remain declared in the new enclosing scope. * * @param indent * the indent level for the new scope * * @return */ public EnclosingScope newEnclosingScope() { return new EnclosingScope(indent, variables, fieldAliases, lifetimes, unavailableNames, inLoop); } /** * Create a new enclosing scope in which variables can be declared which * are remain invisible to this enclosing scope. All variables declared * in this enclosing scope remain declared in the new enclosing scope. * * @param indent * the indent level for the new scope * * @return */ public EnclosingScope newEnclosingScope(Indent indent) { return new EnclosingScope(indent, variables, fieldAliases, lifetimes, unavailableNames, inLoop); } /** * Create a new enclosing scope in which variables can be declared which * are remain invisible to this enclosing scope. All variables declared * in this enclosing scope remain declared in the new enclosing scope. * * @param indent * the indent level for the new scope * * @return */ public EnclosingScope newEnclosingScope(Indent indent, boolean inLoop) { return new EnclosingScope(indent, variables, fieldAliases, lifetimes, unavailableNames, inLoop); } /** * Create a new enclosing scope in which variables can be declared which * are remain invisible to this enclosing scope. All variables declared * in this enclosing scope remain declared in the new enclosing scope. * * @param indent * the indent level for the new scope * * @return */ public EnclosingScope newEnclosingScope(Set<String> contextLifetimes) { return new EnclosingScope(indent, variables, fieldAliases, contextLifetimes, unavailableNames, false); } } }