/* --------------------------------------------------------- * * __________ D E L T A S C R I P T * * (_________() * * / === / - A fast, dynamic scripting language * * | == | - Version 4.13.11.0 * * / === / - Developed by Adam R. Nelson * * | = = | - 2011-2013 * * / === / - Distributed under GNU LGPL v3 * * (________() - http://github.com/ar-nelson/deltascript * * * * --------------------------------------------------------- */ package com.sector91.delta.script.parser; import static com.sector91.delta.script.parser.DScriptLexer.*; import static com.sector91.delta.script.Operator.getUnaryOperator; import static com.sector91.delta.script.Operator.getBinaryOperator; import static com.sector91.delta.script.instrs.InstrType.*; import static com.sector91.util.StringTemplate.$; import java.io.IOException; import java.math.BigInteger; import java.util.*; import java.util.regex.Pattern; import com.sector91.delta.script.DScriptContext; import com.sector91.delta.script.DeltaScript; import com.sector91.delta.script.Operator; import com.sector91.delta.script.instrs.DSInstr; import com.sector91.delta.script.instrs.InstrType; import com.sector91.delta.script.objects.DS_Scope; import com.sector91.delta.script.objects.DS_Tag; import com.sector91.util.A; import com.sector91.util.ArrayUtil; import com.sector91.util._; /** * <p>Used internally to parse DeltaScript code into {@link CompiledDeltaScript} * objects.</p> * * <p>Users of the DeltaScript library should compile and execute scripts with * {@link DeltaScript}, rather than this class.</p> * * @author Adam R. Nelson * @version 4.13.11.0 */ public final class DScriptParser { public static final int LEFT_ASSOC = 0; public static final int RIGHT_ASSOC = 1; public static final int PREFIX = 2; public static final int POSTFIX = 4; public static final int STACKABLE = 8; static final char SPECIAL_CHAR = '$'; // PUBLIC METHODS // ---------------------------------------------------- private DScriptContext context; public DScriptParser(DScriptContext context) {this.context = context;} public DSInstr compile(String text, String scriptName) throws DScriptParserException {return compile(new DScriptLexer(text), scriptName);} public synchronized DSInstr compile(Iterator<LexToken> tokens, String scriptName) throws DScriptParserException { try { this.tokenStream = tokens; this.scriptName = scriptName; reset(); List<DSInstr> statements = new ArrayList<DSInstr>(); do { DSInstr statement = parseStatement(); if (statement != null) statements.add(statement); } while (!finished); return prognize(statements); } catch (DScriptLexerException ex) {throw new DScriptParserException(ex, scriptName, t);} catch (DScriptParserException ex) {throw ex;} catch (Exception ex) { ex.printStackTrace(); throw err("Java error occurred: " + ex.getMessage()); } } // UTILITY METHODS // ---------------------------------------------------- Iterator<LexToken> tokenStream; LexToken lastToken, t, nextToken; LinkedList<ArrowContext> arrowStack = new LinkedList<ArrowContext>(); String scriptName; int branchCount, loopDepth; boolean finished; private DScriptParserException err(String message) {return new DScriptParserException(message, scriptName, t);} private DScriptParserException unclosed(TokenType type) {return err("Unclosed " + type.friendlyDesc + ".");} //private DSInstr instr(InstrType type, String head, DSInstr... tail) //{return instr(type, t.start(), t.end()-t.start(), head, tail);} private DSInstr instr(InstrType type, LexToken tk, String head, DSInstr... tail) {return instr(type, tk.start(), tk.end()-tk.start(), head, tail);} private DSInstr wrapInstr(InstrType type, String head, DSInstr... tail) { return instr(type, tail[0].sourceStart(), len(tail[0], tail[tail.length-1]), head, tail); } private DSInstr instr(InstrType type, int start, int len, String head, DSInstr... tail) { // Perform some simple optimizations: if (type == NOT) { // not optimization: combine not and boolean operators. switch (tail[0].type()) { case NOT: return tail[0].tail()[0]; case EQ: return instr(NE, tail[0].sourceStart(), tail[0].sourceLength(), tail[0].head(), tail[0].tail()); case NE: return instr(EQ, tail[0].sourceStart(), tail[0].sourceLength(), tail[0].head(), tail[0].tail()); case LT: return instr(GE, tail[0].sourceStart(), tail[0].sourceLength(), tail[0].head(), tail[0].tail()); case LE: return instr(GT, tail[0].sourceStart(), tail[0].sourceLength(), tail[0].head(), tail[0].tail()); case GT: return instr(LE, tail[0].sourceStart(), tail[0].sourceLength(), tail[0].head(), tail[0].tail()); case GE: return instr(LT, tail[0].sourceStart(), tail[0].sourceLength(), tail[0].head(), tail[0].tail()); default: // Don't optimize. } } return DSInstr.create(type, context, start, len, head, tail); } private int len(DSInstr start, DSInstr end) {return len(start.sourceStart(), end);} private int len(int start, DSInstr end) {return end.sourceStart()+end.sourceLength()-start;} private void reset() { lastToken = null; t = null; nextToken = null; branchCount = 0; finished = false; read(); read(); } private boolean read() { lastToken = t; t = nextToken; boolean wasFinished = finished; if (tokenStream.hasNext()) nextToken = tokenStream.next(); else finished = true; return !wasFinished; } private TokenType lookahead() throws DScriptLexerException, IOException {return nextToken.type();} private DSInstr prognize(List<DSInstr> stack) { if (stack.isEmpty()) return instr(NONE, 0, 0, null); else if (stack.size() == 1) return stack.get(0); else { final DSInstr first = stack.get(0); final DSInstr last = stack.get(stack.size()-1); return instr(DO, first.sourceStart(), last.sourceStart()+last.sourceLength()-first.sourceStart(), null, stack.toArray(new DSInstr[stack.size()])); } } private DSInstr validateSpecial(String name, LexToken tk) throws DScriptParserException { if (name.charAt(0) != SPECIAL_CHAR) throw err("Special variables must start with " + SPECIAL_CHAR); final int s = tk.start(), l=tk.end()-tk.start(); if (DS_Scope.MODIFIABLE_SPECIAL_VARS.contains(DS_Tag.tag(name))) return instr(GET, s, l, name); if (name.matches("^"+Pattern.quote(Character.toString(SPECIAL_CHAR))+ "+$")) { int level = name.length(); if (level == 1) return instr($VAR, s,l, DS_Scope.S_SCOPE.stringValue()); else { return instr(CALL, s,l, null, instr($VAR, s,l, DS_Scope.S_ANCESTOR.stringValue()), instr(NONE, s,l, null, instr(SCALAR, s,l, Integer.toString(level-1)))); } } else return instr($VAR, s,l, name); } private boolean separator() throws Exception { if (t.type() != TokenType.SEPARATOR) return false; while (read() && t.type() == TokenType.SEPARATOR) {/* Do nothing. */} return true; } private void requireSeparator() throws Exception { if (!separator()) throw err("Unexpected " + t + "; separator or newline expected."); } // Generates a placeholder variable name, which can be used for anonymous // (underscore) arguments to arrow functions. private String generateUnderscoreVarName(int index) {return "%_" + index;} // BLOCKS // ---------------------------------------------------- // Starts on an 'if' token and parses until an 'xi' is encountered. // Also handles one-liner 'if ... then' statements. private DSInstr parseIfBlock() throws Exception { final int start = t.start(); if (!read()) throw unclosed(TokenType.IF); boolean elseEncountered = false; LinkedList<DSInstr> expressions = new LinkedList<DSInstr>(), branches = new LinkedList<DSInstr>(), statements = new LinkedList<DSInstr>(); DSInstr firstExpr = parseExpression(false, TokenType.THEN); expressions.add(firstExpr); boolean oneLiner = false; if (t.type() == TokenType.THEN && read()) if (t.type() != TokenType.SEPARATOR) oneLiner = true; if (!oneLiner) requireSeparator(); while (t.type() != TokenType.END_IF) { if (finished) throw unclosed(TokenType.IF); if (t.type() == TokenType.ELSE_IF) { if (oneLiner) throw err(t + " is not valid in one-liner if statement."); if (elseEncountered) throw err("Unexpected " + t + "."); DSInstr branch = prognize(statements); statements.clear(); branches.add(branch); read(); expressions.add(parseExpression(false)); separator(); } else if (t.type() == TokenType.ELSE) { final LexToken elseToken = t; if (elseEncountered) throw err("If statement cannot have more than one else."); elseEncountered = true; DSInstr branch = prognize(statements); statements.clear(); branches.add(branch); read(); expressions.add(instr(SCALAR, elseToken, "true")); if (!oneLiner) requireSeparator(); } else { DSInstr statement; if (oneLiner) statement = parseStatement(oneLiner, TokenType.ELSE, TokenType.ELSE_IF, TokenType.END_IF); else statement = parseStatement(); if (statement != null) statements.add(statement); separator(); } } branches.add(prognize(statements)); if (branches.size() > 1) { DSInstr[] conds = new DSInstr[branches.size()]; for (int i=0; i<conds.length; i++) { final DSInstr expr = expressions.pollFirst(); final DSInstr branch = branches.pollFirst(); conds[i] = instr(NONE, expr.sourceStart(), len(expr, branch), null, expr, branch); } return instr(COND, start, t.end()-start, null, conds); } else { final DSInstr expr = expressions.pollFirst(); final DSInstr branch = branches.pollFirst(); return instr(IF, start, t.end()-start, null, expr, branch); } } // Returns a 4-tuple constructed from ordered pairs, in this arrangement: // ((fromVar, fromInstr), (whileInstr, whereInstr)) private _<_<String, DSInstr>, _<DSInstr, DSInstr>> parseLoopHeader( boolean requireSeparator) throws Exception { // This is one of the more complex expressions to parse. // A loop header (after the 'loop' or 'for' keyword) has 3 parts: // - 'from' clause // - 'while' or 'until' clause // - 'where' clause (only if a 'from' or 'while'/'until' exists) String fromVar = null; DSInstr fromInstr = null; DSInstr whileInstr = null; DSInstr whereInstr = null; if (!read()) throw err("Incomplete loop header."); // 'from' clause if (t.type() == TokenType.IDENTIFIER) { fromVar = t.value(); if (!read()) throw err("Incomplete loop header."); if (t.type() != TokenType.LOOP_KW || !KW_FROMLOOP.equals(t.value())) throw err($("Expected '{}' after variable name.", KW_FROMLOOP)); if (!read()) throw err("Incomplete loop header."); fromInstr = parseExpression(true, TokenType.LOOP_KW); } while (t.type() == TokenType.LOOP_KW) { // 'while'/'until' clause if (KW_WHILELOOP.equals(t.value())||KW_UNTILLOOP.equals(t.value())) { boolean isWhile = KW_WHILELOOP.equals(t.value()); if (whileInstr != null) throw err($("A loop may only contain one '{}' or '{}'" + " clause.", KW_WHILELOOP, KW_UNTILLOOP)); if (!read()) throw err("Incomplete loop header."); whileInstr = parseExpression(true, TokenType.LOOP_KW); if (isWhile) whileInstr = wrapInstr(NOT, null, whileInstr); } // 'where' clause else if (KW_LOOPCOND.equals(t.value())) { if (whereInstr != null) throw err($("A loop may only contain one '{}' clause.", KW_LOOPCOND)); if (!read()) throw err("Incomplete loop header."); whereInstr = wrapInstr(NOT, null, parseExpression(true, TokenType.LOOP_KW)); } else if (KW_FROMLOOP.equals(t.value())) throw err($("A '{}' clause must be preceded by a variable" + " name.", KW_FROMLOOP)); else throw err("Unrecognized loop keyword: " + t.value()); } if (requireSeparator) { if (t.type() != TokenType.SEPARATOR) throw err("Expected loop header clause or separator, got " + t); } else if (t.type() != TokenType.SEPARATOR && t.type().closes == null) throw err("Expected loop header clause or separator, got " + t); return A._(A._(fromVar, fromInstr), A._(whileInstr, whereInstr)); } // Starts on a 'loop' token and parses until an 'xl' is encountered. private DSInstr parseLoopBlock() throws Exception { loopDepth++; final int start = t.start(); final _<_<String, DSInstr>, _<DSInstr, DSInstr>> header = parseLoopHeader(true); requireSeparator(); List<DSInstr> statements = new LinkedList<DSInstr>(); if (header._2._1 != null) // 'while'/'until' clause { statements.add(wrapInstr(IF, null, header._2._1, instr(BREAK, header._2._1.sourceStart(), header._2._1.sourceLength(), null))); } if (header._2._2 != null) // 'where' clause { statements.add(wrapInstr(IF, null, header._2._2, instr(CONTINUE, header._2._2.sourceStart(), header._2._2.sourceLength(), null))); } while (t.type() != TokenType.END_LOOP) { if (finished) throw unclosed(TokenType.LOOP); DSInstr statement = parseStatement(); if (statement != null) statements.add(statement); separator(); } loopDepth--; // Return the finished loop. final DSInstr body = prognize(statements); if (header._1._1 != null) return instr(FOR, start, t.end()-start, header._1._1, header._1._2, body); else return instr(LOOP, start, t.end()-start, null, body); } // Starts on a 'func' token and parses until an 'xf' is encountered. private DSInstr parseFunctionBlock() throws Exception { final int start = t.start(); if (!read()) throw unclosed(TokenType.FUNC); String name = null; if (t.type() == TokenType.IDENTIFIER) { name = t.value(); if (!read()) throw unclosed(TokenType.FUNC); } // Read the function arguments. boolean oneLiner = false; DSInstr args; if (t.type() == TokenType.O_PAREN) { final int parenStart = t.start(); LinkedList<DSInstr> argInstrs = new LinkedList<DSInstr>(); if (!read()) throw unclosed(TokenType.FUNC); boolean expectingSeparator = false; while (t.type() != TokenType.C_PAREN) { if (t.type() == TokenType.IDENTIFIER) { if (expectingSeparator) throw err("Argument names must be separated with" + " commas, semicolons, or newlines."); argInstrs.add(instr(GET, t, t.value())); expectingSeparator = true; } else if (t.type() == TokenType.SEPARATOR) expectingSeparator = false; else throw err($("Unexpected {} in function argument list." + " Only identifiers are valid here.", t)); if (!read()) throw unclosed(TokenType.O_PAREN); } if (!read()) throw unclosed(TokenType.FUNC); oneLiner = !separator(); args = instr(NONE, parenStart, t.end()-parenStart, null, argInstrs.toArray(new DSInstr[argInstrs.size()])); } else { args = instr(NONE, start, lastToken.end()-start, null); requireSeparator(); } // Read the function statements. LinkedList<DSInstr> statements = new LinkedList<DSInstr>(); if (oneLiner) { statements.add(parseStatement(true, TokenType.END_FUNC)); if (t.type() != TokenType.END_FUNC) throw err($("Expected end of function ('{}').", KW_ENDFUNC)); } else while (t.type() != TokenType.END_FUNC) { if (finished) throw unclosed(TokenType.FUNC); DSInstr statement = parseStatement(); if (statement != null) statements.add(statement); separator(); } // Return the finished function definition. if (name != null) return wrapInstr(DEF, name, instr( FUNC, start, t.end()-start, name, args, prognize(statements))); else return instr(FUNC, start, t.end()-start, KW_FUNC, args, prognize(statements)); } // Starts on a 'scope' token and parses until an 'xs' is encountered. private DSInstr parseScopeBlock() throws Exception { final int start = t.start(); if (!read()) throw unclosed(TokenType.SCOPE); String name = null; if (t.type() == TokenType.IDENTIFIER) { name = t.value(); if (!read()) throw unclosed(TokenType.SCOPE); } requireSeparator(); List<DSInstr> statements = new LinkedList<DSInstr>(); while (t.type() != TokenType.END_SCOPE) { if (finished) throw unclosed(TokenType.SCOPE); DSInstr statement = parseStatement(); if (statement != null) statements.add(statement); separator(); } DSInstr scopeInstr = instr(SCOPE, start, t.end()-start, name, prognize(statements)); if (name != null) return wrapInstr(DEF, name, scopeInstr); else return scopeInstr; } // Starts on a 'branch' token and parses until an 'xb' is encountered. private DSInstr parseBranchBlock() throws Exception { final int start = t.start(); if (!read()) throw unclosed(TokenType.BRANCH); LexToken deftk = null; DSInstr branchVarInstr = null; if (t.type() != TokenType.SEPARATOR) branchVarInstr = parseExpression(true); requireSeparator(); final LinkedList<DSInstr> expressions = new LinkedList<DSInstr>(), branches = new LinkedList<DSInstr>(), statements = new LinkedList<DSInstr>(); while (t.type() != TokenType.END_BRANCH) { if (finished) throw unclosed(TokenType.BRANCH); if (t.type() == TokenType.CASE) { if (deftk != null) throw err($("Unexpected '{}' after '{}'.", KW_CASE, KW_DEFAULT)); if (statements.size() > 0) { DSInstr branch = prognize(statements); statements.clear(); branches.add(branch); } if (!read()) throw unclosed(TokenType.BRANCH); DSInstr expr = parseExpression(false); expressions.add(expr); requireSeparator(); } else if (t.type() == TokenType.DEFAULT_CASE) { if (deftk != null) throw err($("A {} statement can only have one '{}' branch.", KW_BRANCH, KW_DEFAULT)); deftk = t; if (statements.size() > 0) { DSInstr branch = prognize(statements); statements.clear(); branches.add(branch); } if (!read()) throw unclosed(TokenType.BRANCH); expressions.add(null); requireSeparator(); } else { DSInstr statement = parseStatement(); if (statement != null) statements.add(statement); separator(); } } DSInstr branch = prognize(statements); branches.add(branch); DSInstr[] conds = new DSInstr[branches.size()]; int s = branches.size(); for (int i=0; i<s; i++) { final DSInstr expr = expressions.pollFirst(); final DSInstr br = branches.pollFirst(); if (expr == null) { if (branchVarInstr == null) conds[i] = instr(NONE, deftk.start(), len(deftk.start(),br), null, instr(SCALAR, deftk, KW_TRUE), br); else conds[i] = instr(NONE, deftk.start(), len(deftk.start(),br), null, br); } else conds[i] = wrapInstr(NONE, null, expr, br); } DSInstr in; if (branchVarInstr != null) { final DSInstr[] instrs = new DSInstr[conds.length+1]; instrs[0] = branchVarInstr; System.arraycopy(conds, 0, instrs, 1, conds.length); in = instr(BRANCH, start, t.end()-start, null, instrs); } else in = instr(COND, start, t.end()-start, null, conds); return in; } // Starts on a 'do' token and parses until an 'xd' is encountered. private DSInstr parseDoBlock() throws Exception { if (!read()) throw unclosed(TokenType.BLOCK); requireSeparator(); final List<DSInstr> statements = new LinkedList<DSInstr>(); while (t.type() != TokenType.END_BLOCK) { if (finished) throw unclosed(TokenType.BLOCK); DSInstr statement = parseStatement(); if (statement != null) statements.add(statement); separator(); } return prognize(statements); } // SYNTAX ELEMENTS // ---------------------------------------------------- private DSInstr parseStatement() throws Exception {return parseStatement(false);} // Parses a single line (terminated by one of { , ; \n } or any token in // `endTokens`) of DeltaScript code into an instruction. If `oneLiner` is // true, any separators or newlines that occur before the actual code will // cause an exception. private DSInstr parseStatement(boolean oneLiner, TokenType... endTokens) throws Exception { DSInstr instr = null; if (!oneLiner) while (t.type() == TokenType.SEPARATOR) if (!read()) return null; switch (t.type()) { case RETURN: final LexToken rettk = t; if (!read() || (!oneLiner && t.type() == TokenType.SEPARATOR) || t.type().closes != null || ArrayUtil.contains(endTokens, t.type())) instr = instr(RETURN, rettk, null, instr(NONE, rettk, null)); else { DSInstr retExpr = parseExpression(false); instr = instr(RETURN, rettk.start(), len(rettk.start(),retExpr), null, retExpr); } break; case BREAK: if (loopDepth <= 0) throw err($("A {} statement cannot occur outside a loop.", t)); instr = instr(BREAK, t, null); if (read() && !( (!oneLiner && t.type() == TokenType.SEPARATOR) || t.type().closes != null || ArrayUtil.contains(endTokens, t.type()))) throw err($("Unexpected {} after {}.", t, KW_BREAK)); break; case CONTINUE: if (loopDepth <= 0) throw err($("A {} statement cannot occur outside a loop.", t)); instr = instr(CONTINUE, t, null); if (read() && !( (!oneLiner && t.type() == TokenType.SEPARATOR) || t.type().closes != null || ArrayUtil.contains(endTokens, t.type()))) throw err($("Unexpected {} after {}.", t, KW_CONTINUE)); break; case INCLUDE: final int start = t.start(); if (!read()) throw err("Incomplete " + KW_INCLUDE + " statement."); DSInstr incExpr = parseExpression(false); instr = instr(INCLUDE, start, len(start, incExpr), null, incExpr); break; default: instr = parseExpression(oneLiner, endTokens); } return instr; } private DSInstr parseExpression() throws Exception {return parseExpression(false);} // Parses a single DeltaScript expression (terminated by one of { , ; \n } // or any token in `endTokens`) into an instruction. If `oneLiner` is true, // any separators or newlines that occur before the actual code will cause // an exception. Unlike `parseStatement`, this method will not parse // `return`, `break`, `continue`, or `include` statements. private DSInstr parseExpression(boolean oneLiner, TokenType... endTokens) throws Exception { // Operators are parsed using the shunting-yard algorithm. "symbols" are // the operands of the operators. final LinkedList<DSInstr> symbols = new LinkedList<DSInstr>(); final LinkedList<Operator> ops = new LinkedList<Operator>(); final LinkedList<Operator> unaryOps = new LinkedList<Operator>(); final LinkedList<Operator> dotOps = new LinkedList<Operator>(); if (oneLiner) { if (t.type() == TokenType.SEPARATOR) throw err(t + " is not valid after " + lastToken + "."); } else { while (t.type() == TokenType.SEPARATOR) if (!read()) throw err("Expression expected."); } boolean expectingOperator = false; readLoop: do { if (t.type() == TokenType.SEPARATOR || t.type().closes != null || ArrayUtil.contains(endTokens, t.type())) break; if (!expectingOperator) { DSInstr symbol = null; switch (t.type()) { case DEF: symbol = parseDefStatement(); symbols.push(symbol); break readLoop; case FIELD: symbol = parseFieldStatement(); symbols.push(symbol); break readLoop; case IF: symbol = parseIfBlock(); if (t.type() != TokenType.END_IF) throw err($("Unexpected {}; expected end if ({}).", t, KW_ENDIF)); break; case BRANCH: symbol = parseBranchBlock(); if (t.type() != TokenType.END_BRANCH) throw err($("Unexpected {}; expected end branch ({}).", t, KW_ENDBRANCH)); break; case LOOP: symbol = parseLoopBlock(); if (t.type() != TokenType.END_LOOP) throw err($("Unexpected {}; expected end loop ({}).", t, KW_ENDLOOP)); break; case FUNC: symbol = parseFunctionBlock(); if (t.type() != TokenType.END_FUNC) throw err($("Unexpected {}; expected end func ({}).", t, KW_ENDFUNC)); break; case ARROW_FUNC: symbol = parseArrowFunction(); symbols.push(symbol); break readLoop; case SCOPE: symbol = parseScopeBlock(); if (t.type() != TokenType.END_SCOPE) throw err($("Unexpected {}; expected end scope ({}).", t, KW_ENDSCOPE)); break; case BLOCK: symbol = parseDoBlock(); if (t.type() != TokenType.END_BLOCK) throw err($("Unexpected {}; expected end do ({}).", t, KW_ENDBLOCK)); break; case O_PAREN: read(); symbol = parseExpression(); if (t.type() != TokenType.C_PAREN) throw err($("Unexpected {}; expected closing paren.", t)); break; case PREFIX_OP: Operator op = getUnaryOperator(t.value()); if (op != null) unaryOps.push(op); else throw err("No such prefix operator: " + t.value()); break; case STRING: symbol = instr(STRING, t, t.value()); break; case TAG: symbol = instr(TAG, t, t.value()); break; case BOOLEAN: symbol = instr(SCALAR, t, t.value()); break; case BLANK: symbol = instr(NONE, t, null); break; case IDENTIFIER: if (t.value().charAt(0) == SPECIAL_CHAR) symbol = validateSpecial(t.value(), t); else symbol = instr(GET, t, t.value()); break; case UNDERSCORE: if (arrowStack.isEmpty()) throw err("The variable name underscore ('_') is not" + " valid outside an arrow function."); symbol = instr(GET, t, generateUnderscoreVarName( arrowStack.peek().underscores++)); break; case STDLIB_CALL: { final int start = t.start(); if (!read() || t.type() != TokenType.IDENTIFIER) throw err(".. (dot-dot) operator must be followed by" + " an identifier."); symbol = instr(STDLIB, start, t.end()-start, t.value()); break; } case NUMBER: case BIN_NUMBER: case OCT_NUMBER: case HEX_NUMBER: symbol = parseNumber(); break; case O_BRACKET: case O_ANGLE: symbol = parseSequence(); break; case O_BRACE: symbol = parseMap(); break; default: throw err("Expected identifier, got " + t + "."); } if (symbol != null) { symbols.push(symbol); expectingOperator = true; } } else { if (ArrayUtil.contains(endTokens, t)) break readLoop; String opStr; switch (t.type()) { case OPERATOR: opStr = t.value(); break; case POSTFIX_OP: opStr = t.value(); if (ops.peek() == Operator.DOT) dotOps.push(getUnaryOperator(opStr)); else unaryOps.push(getUnaryOperator(opStr)); continue; case DOT: opStr = "."; break; case O_PAREN: symbols.push(parseCall(symbols.pop())); if (t.type() != TokenType.C_PAREN) throw err("Unexpected " + t + "; expected closing paren."); continue; case O_BRACKET: symbols.push(parseIndex(symbols.pop())); if (t.type() != TokenType.C_BRACKET) throw err("Unexpected " + t + "; expected closing bracket."); continue; default: throw err("Unexpected " + t + "."); } Operator op = getBinaryOperator(opStr); if (op == null) throw err("Not a valid operator: " + opStr); // Apply all unary operators before a binary operator. if (!symbols.isEmpty()) { if (op == Operator.DOT) { // The dot operator is a special case; unary // operators should expand around it. Add operators // to apply to a dot operation to a special stack. if (!dotOps.isEmpty() && !unaryOps.isEmpty()) throw err("A dot cannot be followed by an" + " operator."); while (!unaryOps.isEmpty()) dotOps.push(unaryOps.pollLast()); } else while (!unaryOps.isEmpty()) symbols.push(applyUnaryOperator(symbols.pop(), unaryOps.pop())); } if (!ops.isEmpty()) { Operator lastOp = ops.peek(); while ((op.associativity == LEFT_ASSOC && op.precedence - lastOp.precedence <= 0) || (op.associativity == RIGHT_ASSOC && op.precedence - lastOp.precedence < 0)) { popOnOps(symbols, ops, dotOps); if (ops.isEmpty()||symbols.size()<2) break; lastOp = ops.peek(); } } if (!symbols.isEmpty() && op != Operator.DOT) while (!dotOps.isEmpty()) symbols.push(applyUnaryOperator(symbols.pop(), dotOps.pop())); ops.push(op); expectingOperator = false; } } while (read()); // Apply any remaining unary operators. if (!symbols.isEmpty()) { while (!unaryOps.isEmpty()) symbols.push(applyUnaryOperator(symbols.pop(), unaryOps.pop())); } // Put the contents of the symbol and operator stacks together. while (!ops.isEmpty()) { if (symbols.size() > 1) popOnOps(symbols, ops, dotOps); else throw err("Expression expected after \"" + ops.pop() + "\"."); } if (!symbols.isEmpty()) { while (!dotOps.isEmpty()) symbols.push(applyUnaryOperator(symbols.pop(), dotOps.pop())); } if (symbols.size() == 1) {return symbols.pop();} else if (symbols.isEmpty()) {throw err("Expression expected.");} else {throw err("Invalid expression.");} } // Utility method for `parseExpression`. Pops an operator and a "symbol" // instr off of the stack, and merges them with the existing stack. private void popOnOps(Deque<DSInstr> symbols, Deque<Operator> ops, Deque<Operator> dotOps) throws DScriptParserException { Operator op = ops.pop(); // POP ON OPS DSInstr instr2 = symbols.pop(); DSInstr instr1 = symbols.pop(); if (op.isAssignment) { if (op != Operator.ASSIGN) { String subOp = op.str.substring(0, op.str.length()-1); instr2 = instr(OP, instr1.sourceStart(), len(instr1, instr2), subOp, instr1, instr2); } if (instr1.isSingleton()) { if (instr1.type() == GET) symbols.push(instr(SET, instr1.sourceStart(), len(instr1, instr2), instr1.head(), instr2)); else if (instr1.type() == $VAR) throw err(instr1.head() + " is not an assignable special" + " variable."); else throw err("The left-hand side of an assignment must" + " be a variable."); } else { if (instr1.type() == DOTGET) symbols.push(instr(DOTSET, instr1.sourceStart(), len(instr1, instr2), instr1.head(), instr1.tail()[0], instr2)); else if (instr1.type() == INDEX) symbols.push(instr(INDEXSET, instr1.sourceStart(), len(instr1, instr2), null, instr1.tail()[0], instr1.tail()[1],instr2)); else throw err("The left-hand side of an assignment must" + " be a variable."); } } else if (op == Operator.DOT) {symbols.push(dot(instr1, instr2));} else {symbols.push(opInstr(op, instr1, instr2));} } // Creates an instruction from an operator and two operands, handling any // special-case operators that have their own instruction type. private DSInstr opInstr(Operator op, DSInstr a, DSInstr b) { switch (op) { case EQUAL: return wrapInstr(EQ, null, a, b); case NOT_EQUAL: return wrapInstr(NE, null, a, b); case LESS_THAN: return wrapInstr(LT, null, a, b); case LESS_OR_EQ: return wrapInstr(LE, null, a, b); case GREATER_THAN: return wrapInstr(GT, null, a, b); case GREATER_OR_EQ: return wrapInstr(GE, null, a, b); case INSTANCEOF: return wrapInstr(TYPECHK, null, a, b); case NOT_INSTANCEOF: return wrapInstr(NOT, null, wrapInstr(TYPECHK, null, a, b)); case AND: return wrapInstr(AND, null, a, b); case OR: return wrapInstr(OR, null, a, b); case IN: // 'in' is reversed: return instr(OP, a.sourceStart(), len(a, b), op.str, b, a); default: return wrapInstr(OP, op.str, a, b); } } // Creates a new instruction from an unary operator and an existing // instruction. Handles special cases such as the dot operator (which unary // operators should wrap around). private DSInstr applyUnaryOperator(DSInstr instr, Operator op) throws DScriptParserException { // TODO: Correctly track unary operators' source location. if (op.isAssignment) { if (instr.type() == GET) return wrapInstr(SET, instr.head(), wrapInstr(OP,op.str,instr)); else if (instr.type() == DOTGET) return wrapInstr(DOTSET, instr.head(), instr.tail()[0], wrapInstr(OP, op.str, instr)); else if (instr.type() == INDEX) return wrapInstr(INDEXSET, instr.head(), instr.tail()[1], wrapInstr(OP, op.str, instr)); else throw err($("The {} operator can only be applied to a" + " variable.", op)); } else if (op == Operator.NOT) return wrapInstr(NOT, null, instr); else return wrapInstr(OP, op.str, instr); } // Creates an instruction from two instructions joined by the dot (`.`) // operator. private DSInstr dot(DSInstr instr1, DSInstr instr2) throws DScriptParserException { switch (instr2.type()) { case GET: return instr(DOTGET, instr1.sourceStart(), len(instr1, instr2), instr2.head(), instr1); case CALL: return instr(CALL, instr1.sourceStart(), len(instr1, instr2), null, dot(instr1, instr2.tail()[0]), instr2.tail()[1]); case INDEX: return instr(INDEX, instr1.sourceStart(), len(instr1, instr2), null, dot(instr1, instr2.tail()[0]), instr2.tail()[1]); case $VAR: return instr(DOT$VAR, instr1.sourceStart(), len(instr1, instr2), instr2.head(), instr1); /*case OP: if ( instr2.operator.isUnary && (instr2.operator.associativity & POSTFIX) > 0) { System.out.println("i1: " + instr1 + ", i2: " + instr2); return applyUnaryOperator(dot(instr1, instr2.getCdr()[0]), instr2.operator); } // If not a postfix operator, fall through.*/ default: throw err("A dot must be followed by an identifier or function" + " call."); } } // Starting at the opening paren of a function call instruction, parses the // argument list and creates a CALL instruction which calls the object // returned by `lastInstr`. private DSInstr parseCall(DSInstr lastInstr) throws Exception { DSInstr args = parseSequence(); if (args.type() != NONE) throw err("Argument list expected."); return wrapInstr(CALL, null, lastInstr, args); } private DSInstr parseIndex(DSInstr lastInstr) throws Exception { if (t.type() != TokenType.O_BRACKET || !read()) throw err("Index expected."); final DSInstr indexInstr = parseExpression(false, TokenType.C_BRACKET); if (t.type() != TokenType.C_BRACKET) throw err($("Unexpected {}. Expected closing bracket.", t)); return instr(INDEX, lastInstr.sourceStart(), t.end()-lastInstr.sourceStart(), null, lastInstr, indexInstr); } private DSInstr parseDefStatement() throws Exception { final int start = t.start(); if (!read()) throw err($("Incomplete {} statement.", KW_DEF)); final DSInstr expr = parseExpression(true); if (expr.type() == SET) return instr(DEF, start, len(start,expr), expr.head(), expr.tail()); else if (expr.type() == GET) return wrapInstr(DEF, expr.head(), instr(NONE, start, len(start,expr), null)); else if (expr.type() == $VAR) throw err(expr.head() + " is not an assignable special variable."); else throw err($("A {} statement must be an assignment to a local" + " variable.", KW_DEF)); } private DSInstr parseFieldStatement() throws Exception { final int start = t.start(); if (!read()) throw err($("Incomplete {} statement.", KW_FIELD)); final DSInstr expr = parseExpression(true); if (expr.type() == SET || expr.type() == DEF) return instr(FIELD, start, len(start, expr), expr.head(), expr.tail()); else throw err("A " + KW_FIELD + " statement must be an assignment to" + " a local variable."); } private DSInstr parseArrowFunction() throws Exception { int start = t.start(); arrowStack.push(new ArrowContext()); read(); final DSInstr expr = parseExpression(true); final ArrowContext actx = arrowStack.pop(); final DSInstr[] args = new DSInstr[actx.underscores]; for (int i=0; i<args.length; i++) args[i] = instr(GET, start, KW_ARROW.length(), generateUnderscoreVarName(i)); return instr(FUNC, start, len(start, expr), KW_ARROW, instr(NONE, start, KW_ARROW.length(), null, args), expr); } // CONSTANTS // ---------------------------------------------------- private DSInstr parseNumber() throws Exception { String num; int start = t.start(), end = t.end(); switch (t.type()) { case NUMBER: num = t.value(); if (lookahead() == TokenType.DOT && read()) { if (read()) { if (t.type() == TokenType.DEC_SUFFIX) { num += "." + t.value(); if (lookahead() == TokenType.EXP_SUFFIX && read()) num += "E" + t.value(); end = t.end(); } else if (t.type() == TokenType.IDENTIFIER) return instr(DOTGET, start, t.end()-start, t.value(), instr(SCALAR, start, end-start, num)); else throw err("Unexpected " + t + " after dot after" + " number literal. Expected decimal digits or" + " identifier."); } else throw err("Unterminated decimal literal."); } else if (lookahead() == TokenType.EXP_SUFFIX && read()) { num += "E" + t.value(); end = t.end(); } if (lookahead() == TokenType.NTYPE && read()) instr(SCALAR, start, t.end()-start, num, instr(NONE, t, t.value())); break; case BIN_NUMBER: num = new BigInteger(t.value(), 2).toString(); break; case OCT_NUMBER: num = new BigInteger(t.value(), 8).toString(); break; case HEX_NUMBER: num = new BigInteger(t.value(), 16).toString(); break; default: throw err(t + " cannot be parsed as a number."); } return instr(SCALAR, start, end-start, num); } private DSInstr parseSequence() throws Exception { final LinkedList<DSInstr> items = new LinkedList<DSInstr>(); final int start = t.start(); final TokenType openingToken = t.type(); final TokenType closingToken; switch (openingToken) { case O_PAREN: closingToken = TokenType.C_PAREN; break; case O_BRACKET: closingToken = TokenType.C_BRACKET; break; case O_ANGLE: closingToken = TokenType.C_ANGLE; break; default: throw err(t + " is not a valid sequence-start token."); } boolean expectingSeparator = false; read(); while (t.type() != closingToken) { if (finished) throw unclosed(openingToken); if (t.type() == TokenType.SEPARATOR) { expectingSeparator = false; read(); } else { if (expectingSeparator) throw err("Expected separator, newline, or " + closingToken.friendlyDesc + "; got " + t + "."); items.add(parseExpression(false, TokenType.ELLIPSIS, TokenType.FOR_COMPRH, closingToken)); // Handle expansions: if (t.type() == TokenType.ELLIPSIS) { read(); final DSInstr item = items.pollLast(); items.add(instr(EXPAND, item.sourceStart(), t.end()-item.sourceStart(), null, item)); } // Handle comprehensions: else if (t.type() == TokenType.FOR_COMPRH) { final LinkedList<DSInstr> instrs =new LinkedList<DSInstr>(); final DSInstr comprhInstr = items.pollLast(); instrs.add(comprhInstr); final _<_<String, DSInstr>, _<DSInstr, DSInstr>> header = parseLoopHeader(false); if (header._2._1 != null) // 'while'/'until' clause { final DSInstr i = header._2._1; instrs.addFirst( instr(IF, i.sourceStart(), i.sourceLength(), null, i, instr(BREAK, i.sourceStart(), i.sourceLength(), null))); } if (header._2._2 != null) // 'where' clause { final DSInstr i = header._2._2; instrs.addFirst( instr(IF, i.sourceStart(), i.sourceLength(), null, i, instr(CONTINUE, i.sourceStart(), i.sourceLength(), null))); } items.add(wrapInstr(EXPAND, null, instr(COMPRH, comprhInstr.sourceStart(), len(comprhInstr, instrs.getLast()), header._1._1, header._1._2, prognize(instrs)))); } expectingSeparator = true; } } DSInstr[] tail = items.toArray(new DSInstr[items.size()]); switch (openingToken) { case O_PAREN: return instr(NONE, start, t.end()-start, null, tail); case O_BRACKET: return instr(ARRAY, start, t.end()-start, null, tail); case O_ANGLE: return instr(VECTOR, start, t.end()-start, null, tail); default: throw err(openingToken.friendlyDesc + " is not a valid sequence-start token."); } } private DSInstr parseMap() throws Exception { final int start = t.start(); final LinkedList<DSInstr> keys = new LinkedList<DSInstr>(); final LinkedList<DSInstr> values = new LinkedList<DSInstr>(); boolean expectingSeparator = false; read(); while (t.type() != TokenType.C_BRACE) { if (finished) throw unclosed(TokenType.O_BRACE); if (t.type() == TokenType.SEPARATOR) { expectingSeparator = false; read(); continue; } else { if (expectingSeparator) throw err($("Expected separator, newline, or {}; got {}.", TokenType.C_BRACE.friendlyDesc, t)); DSInstr key = parseExpression(false, TokenType.COLON); if (key.type() == GET && !key.head().startsWith("%")) key = instr(TAG, key.sourceStart(), key.sourceLength(), key.head()); keys.add(key); while (t.type() == TokenType.SEPARATOR && read()) {} if (t.type() != TokenType.COLON) throw err($("Unexpected {} after map key; {} expected.", t, TokenType.COLON.friendlyDesc)); read(); final DSInstr value = parseExpression(false); if (value == null) throw err("Map value expected."); values.add(value); expectingSeparator = true; } } if (keys.size() != values.size()) throw err("The number of keys and values in a map must match."); final int count = keys.size(); DSInstr[] cdr = new DSInstr[count*2]; for (int i=0; i<count; i++) { cdr[i*2] = keys.pollFirst(); cdr[i*2+1] = values.pollFirst(); } return instr(MAP, start, t.end()-start, null, cdr); } private final class ArrowContext { int underscores = 0; } }