/*
* Copyright (c) 2007 BUSINESS OBJECTS SOFTWARE LIMITED
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Business Objects nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* LambdaLifter.java
* Created: Feb 2, 2001
* By: Bo Ilic
*/
package org.openquark.cal.compiler;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.openquark.cal.util.ArrayStack;
/**
* This class implements a lambda lifter for CAL. Lambda lifting is the process in which lambda
* expressions and local function definitions that bind arguments are removed from the
* program. This is done by rewriting the original expressions using certain new auxiliary
* global functions. The main reference is Peyton-Jones and Lester, chapter 6.
* <p>
* The lambda lifter also (optionally) lifts case expressions. What this means is that every
* case expression will occur as the first thing on the rhs of a top level function e.g.
* function x1 ... x2 = case e of ...
* This is needed by the g and lecc machines in order to be lazy about evaluating the expression e
* under certain circumstances.
* <p>
*
* Creation date: (2/2/01 4:40:54 PM)
* @author Bo Ilic
*/
final class LambdaLifter {
/** Set to true to have debug info printed while running the lambda lifter. */
private static final boolean DEBUG_INFO = false;
private final CALCompiler compiler;
/** The module in which the lifted entities belong to */
private final ModuleName currentModuleName;
private final FreeVariableFinder freeVariableFinder;
private final List<ParseTreeNode> liftedFunctionList;
/**
* information about the local functions, case expressions and lambda
* expressions that are lifted from a given function definition. Used to give meaningful
* names to the lifted functions.
*/
private LambdaLiftInfo lambdaLiftInfo;
/** true if you want case expressions to be lifted, otherwise case expressions can occur at any depth. */
private static final boolean LIFT_CASES = true;
private static int totalLambdaLiftCount;
private static int totalLocalFunctionLiftCount;
private static int totalCaseLiftCount;
private static int totalDCSelectionLiftCount;
/**
* Constructs a LambdaLifter from a Compiler and a FreeVariableFinder.
*
* @param compiler
* @param freeVariableFinder
* @param currentModuleName
*/
LambdaLifter(CALCompiler compiler, FreeVariableFinder freeVariableFinder, ModuleName currentModuleName) {
if (compiler == null || freeVariableFinder == null || currentModuleName == null) {
throw new NullPointerException();
}
this.compiler = compiler;
this.freeVariableFinder = freeVariableFinder;
this.currentModuleName = currentModuleName;
liftedFunctionList = new ArrayList<ParseTreeNode>();
}
/**
* A helper function that creates a new global function.
* If freeVariablesSet = y1 ... ym, varListNode = x1 ... xn then the top-level function
* functionName y1 ... ym x1 ... xn = rhsExpr
* is created.
*
* Creation date: (2/7/01 5:50:24 PM)
* @param functionName name of the new function. Should start with a $.
* @param freeVariablesSet the variables y1, ... ym
* @param paramListNode holds the variables x1,...,xn
*/
private void addLiftedFunction(String functionName, Set<String> freeVariablesSet, ParseTreeNode paramListNode) {
ParseTreeNode liftedSCNode = new ParseTreeNode(CALTreeParserTokenTypes.TOP_LEVEL_FUNCTION_DEFN, "TOP_LEVEL_FUNCTION_DEFN");
ParseTreeNode optionalCALDocNode = new ParseTreeNode(CALTreeParserTokenTypes.OPTIONAL_CALDOC_COMMENT, "OPTIONAL_CALDOC_COMMENT");
liftedSCNode.setFirstChild(optionalCALDocNode);
ParseTreeNode accessModifierNode = new ParseTreeNode(CALTreeParserTokenTypes.ACCESS_MODIFIER, "ACCESS_MODIFIER");
optionalCALDocNode.setNextSibling(accessModifierNode);
ParseTreeNode privateNode = new ParseTreeNode(CALTreeParserTokenTypes.LITERAL_private, "private");
accessModifierNode.setFirstChild(privateNode);
ParseTreeNode liftedSCNameNode = new ParseTreeNode(CALTreeParserTokenTypes.VAR_ID, functionName);
accessModifierNode.setNextSibling(liftedSCNameNode);
ParseTreeNode liftedParamListNode = new ParseTreeNode(CALTreeParserTokenTypes.FUNCTION_PARAM_LIST, "FUNCTION_PARAM_LIST");
liftedSCNameNode.setNextSibling(liftedParamListNode);
// This can contain the type expression (for lambda and case expressions) so copy it over.
liftedParamListNode.setTypeExprForFunctionParamList(paramListNode.getTypeExprForFunctionParamList());
ParseTreeNode rhsExprNode = paramListNode.nextSibling();
liftedParamListNode.setNextSibling(rhsExprNode);
//Note: the logic could be simplified by using addChild instead of setFirstChild and setNextSibling.
//However, then this algorithm would be O(n2) in the number of children instead of O(n).
ParseTreeNode previousVarNode = null;
Iterator<String> it = freeVariablesSet.iterator();
if (it.hasNext()) {
ParseTreeNode freeVarNode = getArgVarNode(it.next());
freeVarNode.setIsLiftedArgument(true);
liftedParamListNode.setFirstChild(freeVarNode);
previousVarNode = freeVarNode;
while (it.hasNext()) {
freeVarNode = getArgVarNode(it.next());
freeVarNode.setIsLiftedArgument(true);
previousVarNode.setNextSibling(freeVarNode);
previousVarNode = freeVarNode;
}
}
ParseTreeNode varNode = paramListNode.firstChild();
if (varNode != null) {
if (previousVarNode != null) {
previousVarNode.setNextSibling(varNode);
} else {
liftedParamListNode.setFirstChild(varNode);
}
previousVarNode = varNode;
for (varNode = varNode.nextSibling(); varNode != null; varNode = varNode.nextSibling()) {
previousVarNode.setNextSibling(varNode);
previousVarNode = varNode;
}
}
liftedFunctionList.add(liftedSCNode);
}
/**
* If a function, lambda or case is lifted, any free variables in its defining expression become
* argument variables of the lifted function. If these free variables are known to be evaluated
* to weak-head normal form, then they can be added as plinged arguments to the lifted function.
*
* This is a run-time performance optimization. For example, the lifted argument could represent the
* length of an array (:: Int) and the lifted function could be a tail-recursive local function. In that
* case the length argument will be passed to successive recursive calls as a primitive "int".
*
* @param varName
* @return ParseTreeNode parameter node for the lifted function.
*/
private ParseTreeNode getArgVarNode (String varName) {
int tokenType;
if (compiler.getTypeChecker().isEvaluatedLocalVariable(varName)) {
tokenType = CALTreeParserTokenTypes.STRICT_PARAM;
} else {
tokenType = CALTreeParserTokenTypes.LAZY_PARAM;
}
return new ParseTreeNode(tokenType, varName);
}
/**
* The entry method for actually doing the lambda lifting. This method assumes that
* the program has been successfully type checked. It adds the newly lifted functions
* as children of outerDefnListNode.
* Creation date: (2/2/01 4:58:46 PM)
* @param outerDefnListNode root parse tree node of the top level definitions
*/
void lift(ParseTreeNode outerDefnListNode) {
outerDefnListNode.verifyType(CALTreeParserTokenTypes.OUTER_DEFN_LIST);
if (outerDefnListNode.firstChild() == null) {
return;
}
liftedFunctionList.clear();
lambdaLiftInfo = null;
ArrayStack<String> boundVariablesStack = ArrayStack.make();
int lambdaLiftCount = 0;
int localSCLiftCount = 0;
int caseLiftCount = 0;
int dcSelectionLiftCount = 0;
ParseTreeNode previousSibling = null;
for (final ParseTreeNode parseTree : outerDefnListNode) {
if (parseTree.getType() == CALTreeParserTokenTypes.TOP_LEVEL_FUNCTION_DEFN) {
if (!boundVariablesStack.isEmpty()) {
throw new IllegalStateException("LambdaLifter: Programming error. Non empty bound variables stack.");
}
ParseTreeNode functionNameNode = parseTree.getChild(2);
functionNameNode.verifyType(CALTreeParserTokenTypes.VAR_ID);
lambdaLiftInfo = new LambdaLiftInfo (functionNameNode.getText());
ParseTreeNode varListNode = functionNameNode.nextSibling();
//we do not need to lift cases that occur immediately after a top level function definition
//e.g. f xs = case e of ...
// f xs = let ... in case e of
final boolean caseNeedsLifting = false;
liftExpressionPartOfBoundExpr(boundVariablesStack, varListNode, caseNeedsLifting);
if (DEBUG_INFO) {
lambdaLiftCount += lambdaLiftInfo.getLambdaLiftCount();
localSCLiftCount += lambdaLiftInfo.getLocalFunctionLiftCount();
caseLiftCount += lambdaLiftInfo.getCaseLiftCount();
dcSelectionLiftCount += lambdaLiftInfo.getDCSelectionLiftCount();
}
lambdaLiftInfo = null;
}
previousSibling = parseTree;
}
//Now add the newly created lifted functions to the original program parse tree.
int nLifts = liftedFunctionList.size();
if (DEBUG_INFO) {
LambdaLifter.totalLambdaLiftCount += lambdaLiftCount;
LambdaLifter.totalLocalFunctionLiftCount += localSCLiftCount;
LambdaLifter.totalCaseLiftCount += caseLiftCount;
LambdaLifter.totalDCSelectionLiftCount += dcSelectionLiftCount;
System.out.println("total number of lifts for module " + currentModuleName + " = " + nLifts);
System.out.println("\tnumber of lambdas lifted = " + lambdaLiftCount);
System.out.println("\tnumber of local functions lifted = " + localSCLiftCount);
System.out.println("\tnumber of case expressions lifted = " + caseLiftCount);
System.out.println("\tnumber of data cons field selections lifted = " + dcSelectionLiftCount);
System.out.println("\ttotal of lambda definitions lifted = " + LambdaLifter.totalLambdaLiftCount);
System.out.println("\ttotal of local functions lifted = " + LambdaLifter.totalLocalFunctionLiftCount);
System.out.println("\ttotal of case expressions lifted = " + LambdaLifter.totalCaseLiftCount);
System.out.println("\ttotal of data cons field selections lifted = " + LambdaLifter.totalDCSelectionLiftCount);
System.out.println("");
}
for (int i = 0; i < nLifts; ++i) {
ParseTreeNode nextSibling = liftedFunctionList.get(i);
// if (DEBUG_INFO) {
// System.out.println(nextSibling.toStringTree());
// System.out.println("");
// }
previousSibling.setNextSibling(nextSibling);
previousSibling = nextSibling;
}
}
/**
* Lifts the lambda expressions occurring within this expression.
*
* Creation date: (2/8/01 10:36:13 AM)
* @param boundVariablesStack ArrayStack of all variable names that are visible at this point of
* parseTree, excluding top-level function names. In other words, this stack may contain the names
* of local or top-level function argument variables, local function names or binder variables in a lambda
* declaration. The same variable name can occur more than once because of scoping.
* @param parseTree expression parse tree
* @param caseNeedsLifting
*/
private void liftExpr(ArrayStack<String> boundVariablesStack, ParseTreeNode parseTree, boolean caseNeedsLifting) {
int nodeType = parseTree.getType();
switch (nodeType) {
case CALTreeParserTokenTypes.VIRTUAL_LET_NONREC:
case CALTreeParserTokenTypes.VIRTUAL_LET_REC:
{
liftLetExprJohnssonStyle(boundVariablesStack, parseTree, caseNeedsLifting);
break;
}
case CALTreeParserTokenTypes.LAMBDA_DEFN :
liftLambdaExpr (boundVariablesStack, parseTree);
break;
case CALTreeParserTokenTypes.SELECT_DATA_CONSTRUCTOR_FIELD:
{
ParseTreeNode exprNode = parseTree.firstChild();
liftExpr(boundVariablesStack, exprNode, true);
break;
}
case CALTreeParserTokenTypes.VIRTUAL_DATA_CONSTRUCTOR_CASE :
case CALTreeParserTokenTypes.VIRTUAL_RECORD_CASE:
case CALTreeParserTokenTypes.VIRTUAL_TUPLE_CASE:
liftCaseExpr (boundVariablesStack, parseTree, caseNeedsLifting);
break;
case CALTreeParserTokenTypes.LITERAL_if :
{
//we do not need to lift cases appearing as the top-level expressions in the true and false parts
//of an if-then-else if the if-then-else occurs in a context where if instead it were a case, then
//it would not need lifting.
ParseTreeNode condExprNode = parseTree.firstChild();
liftExpr(boundVariablesStack, condExprNode, true);
ParseTreeNode ifTrueNode = condExprNode.nextSibling();
liftExpr(boundVariablesStack, ifTrueNode, caseNeedsLifting);
ParseTreeNode ifFalseNode = ifTrueNode.nextSibling();
liftExpr(boundVariablesStack, ifFalseNode, caseNeedsLifting);
return;
}
case CALTreeParserTokenTypes.APPLICATION :
case CALTreeParserTokenTypes.TUPLE_CONSTRUCTOR :
{
// Application:
// Data constructor field selection is parsed as an application, the first child being of type SELECT_DC.._FIELD.
// There may or may not be any arguments. If none, we can just go right to the selection.
// Tuple constructor:
// In the case of one child, we are really dealing with a parenthesized expression.
if (parseTree.hasExactlyOneChild()) {
liftExpr(boundVariablesStack, parseTree.firstChild(), caseNeedsLifting);
return;
} else {
// Application: an application of an expr to zero arguments.
// Tuple: really a parenthesized expression.
// Fall through.
}
}
case CALTreeParserTokenTypes.LIST_CONSTRUCTOR :
{
for (final ParseTreeNode exprNode : parseTree) {
liftExpr(boundVariablesStack, exprNode, true);
}
return;
}
//function names and variables
case CALTreeParserTokenTypes.QUALIFIED_VAR :
//data constructors
case CALTreeParserTokenTypes.QUALIFIED_CONS :
//literals
case CALTreeParserTokenTypes.INTEGER_LITERAL :
case CALTreeParserTokenTypes.FLOAT_LITERAL :
case CALTreeParserTokenTypes.CHAR_LITERAL :
case CALTreeParserTokenTypes.STRING_LITERAL :
return;
case CALTreeParserTokenTypes.RECORD_CONSTRUCTOR:
{
ParseTreeNode baseRecordNode = parseTree.firstChild();
baseRecordNode.verifyType(CALTreeParserTokenTypes.BASE_RECORD);
ParseTreeNode baseRecordExprNode = baseRecordNode.firstChild();
if (baseRecordExprNode != null) {
liftExpr(boundVariablesStack, baseRecordExprNode, true);
}
ParseTreeNode fieldModificationListNode = baseRecordNode.nextSibling();
fieldModificationListNode.verifyType(CALTreeParserTokenTypes.FIELD_MODIFICATION_LIST);
for (final ParseTreeNode fieldModificationNode : fieldModificationListNode) {
fieldModificationNode.verifyType(CALTreeParserTokenTypes.FIELD_EXTENSION,
CALTreeParserTokenTypes.FIELD_VALUE_UPDATE);
ParseTreeNode valueExprNode = fieldModificationNode.getChild(1);
liftExpr(boundVariablesStack, valueExprNode, true);
}
return;
}
case CALTreeParserTokenTypes.SELECT_RECORD_FIELD:
{
ParseTreeNode exprNode = parseTree.firstChild();
liftExpr(boundVariablesStack, exprNode, true);
return;
}
case CALTreeParserTokenTypes.EXPRESSION_TYPE_SIGNATURE:
{
//in the definition
//x = (case ...) :: Int
//the case does not need lifting since the type signature does not have an
//operation effect in the runtime.
ParseTreeNode exprNode = parseTree.firstChild();
liftExpr(boundVariablesStack, exprNode, caseNeedsLifting);
return;
}
//these operators should be replaced by their functional forms by this point.
case CALTreeParserTokenTypes.BARBAR :
case CALTreeParserTokenTypes.AMPERSANDAMPERSAND :
case CALTreeParserTokenTypes.PLUSPLUS :
case CALTreeParserTokenTypes.LESS_THAN :
case CALTreeParserTokenTypes.LESS_THAN_OR_EQUALS :
case CALTreeParserTokenTypes.EQUALSEQUALS :
case CALTreeParserTokenTypes.NOT_EQUALS :
case CALTreeParserTokenTypes.GREATER_THAN_OR_EQUALS :
case CALTreeParserTokenTypes.GREATER_THAN :
case CALTreeParserTokenTypes.PLUS :
case CALTreeParserTokenTypes.MINUS :
case CALTreeParserTokenTypes.ASTERISK :
case CALTreeParserTokenTypes.SOLIDUS :
case CALTreeParserTokenTypes.PERCENT:
case CALTreeParserTokenTypes.COLON :
case CALTreeParserTokenTypes.UNARY_MINUS:
case CALTreeParserTokenTypes.POUND:
case CALTreeParserTokenTypes.DOLLAR:
case CALTreeParserTokenTypes.BACKQUOTE:
default :
{
parseTree.unexpectedParseTreeNode();
return;
}
}
}
/**
* Lifts (let f1 x1 = e1; f2 x2 = e2; ... fn xn = en) in e.
* The reason this is needed is that most machines cannot handle having arguments to
* local functions.
* <p>
* The details of Johnsson-style lambda lifting are described in Peyton Jones and Lester's book
* on Implementing Functional Programming Languages pg.239.
* <p>
* Johnsson style lambda lifting is a special way of lifting local functions so that:
* <ol>
* <li> in the case of tail-recursive local functions, the lifted functions are indeed
* tail recursive. This lets the runtime take advantage of the tail recursion optimization
* which implements a tail recursive call as a goto.
* <li> even for non-recursive local function definitions, the call to the lifted function in the
* "in" part of the let is a direct call to a named function, which is more efficient
* in lecc.
* </ol>
*
* @param boundVariablesStack
* @param letExprNode
*/
private void liftLetExprJohnssonStyle(ArrayStack<String> boundVariablesStack, ParseTreeNode letExprNode, boolean caseNeedsLifting) {
//Here is an example of Johnsson style lambda lifting showing the main steps.
//Note for all this to work without causing naming clashes we assume that all names are globally distinct.
//Otherwise the lifted functions can clash with top-level names, and more subtly, added argument variables
//can clash with existing argument variable names.
//
//1. find the free variables in the let block excluding the functions to be lifted. These are x, y and k below.
//2. add these free variables as extra arguments to each of the functions defined in the let
// block. For example, we add to g and h but not to k, since k has 0 arguments and will not be lifted.
//3. replace all occurences of the lifted functions in the 'in' part of the let, and in each of the defining
// expressions (including for the 0 argument bindings like k) with the argument augmented versions
// e.g. g is replaced by (g x y k).
//4. remove the functions to be lifted to the top level. This may get rid of the let block entirely if there
// are no 0 argument assignments in the block.
//
//f x y =
// let
// g p = h p + x;
// h q = k + y + q;
// k = g y;
// in
// g 4.0;
//
//after Johnsson style lambda lifting this is:
//
//f x y =
// let
// k = g x y k y;
// in
// g x y k 4.0;
//
//g x y k p = h x y k p + x;
//h x y k q = k + y + q;
letExprNode.verifyType(CALTreeParserTokenTypes.VIRTUAL_LET_NONREC, CALTreeParserTokenTypes.VIRTUAL_LET_REC);
//In the expression
//let f1 xs1 = e1; f2 xs2 = e2; ... in e
//First add those fi that have zero arity to the bound variables stack.
ParseTreeNode defnListNode = letExprNode.firstChild();
defnListNode.verifyType(CALTreeParserTokenTypes.LET_DEFN_LIST);
int nLocalZeroAritySCs = 0;
for (final ParseTreeNode defnNode : defnListNode) {
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN &&
!hasPositiveArity(defnNode)) {
++nLocalZeroAritySCs;
ParseTreeNode optionalCALDocNode = defnNode.firstChild();
optionalCALDocNode.verifyType(CALTreeParserTokenTypes.OPTIONAL_CALDOC_COMMENT);
ParseTreeNode localFunctionNameNode = optionalCALDocNode.nextSibling();
localFunctionNameNode.verifyType(CALTreeParserTokenTypes.VAR_ID);
// functionName is now a bound variable for the other declarations in the 'let' and for the expression
// following the 'in'.
boundVariablesStack.push(localFunctionNameNode.getText());
}
}
//Iterate over all the local functions (zero and positive arity) finding the free variables in
//their defining expressions.
Set<String> freeVariablesSet = new LinkedHashSet<String>();
for (final ParseTreeNode defnNode : defnListNode) {
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN) {
ParseTreeNode optionalCALDocNode = defnNode.firstChild();
optionalCALDocNode.verifyType(CALTreeParserTokenTypes.OPTIONAL_CALDOC_COMMENT);
ParseTreeNode localFunctionNameNode = optionalCALDocNode.nextSibling();
ParseTreeNode varListNode = localFunctionNameNode.nextSibling();
freeVariablesSet.addAll(freeVariableFinder.findFreeNamesInLambdaExpr(boundVariablesStack, varListNode));
}
}
//Augment the arguments of the positive arity functions by the free variables set, and add to the list of lifted functions.
freeVariablesSet = Collections.unmodifiableSet(freeVariablesSet); //for safety at this point...
Set<String> positiveArityFunctionNamesSet = new LinkedHashSet<String>();
for (final ParseTreeNode defnNode : defnListNode) {
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN &&
hasPositiveArity (defnNode)) {
ParseTreeNode optionalCALDocNode = defnNode.firstChild();
optionalCALDocNode.verifyType(CALTreeParserTokenTypes.OPTIONAL_CALDOC_COMMENT);
ParseTreeNode localSCNameNode = optionalCALDocNode.nextSibling();
localSCNameNode.verifyType(CALTreeParserTokenTypes.VAR_ID);
String localSCName = localSCNameNode.getText();
ParseTreeNode varListNode = localSCNameNode.nextSibling();
addLiftedFunction (localSCName, freeVariablesSet, varListNode);
positiveArityFunctionNamesSet.add(localSCName);
}
}
final int nFunctionsToLift = positiveArityFunctionNamesSet.size();
if (nFunctionsToLift > 0) {
//Update the applications occurring in the defining expressions
//e.g. f x y = ... f ... with free variables u v , then this stage will replace the ... f ... with ... (f u v) ...
//we don't need to do this for non-recursive lets, since the lifted functions are guaranteed not to occur
//in their defining expressions by definition of being a non-recursive let!
if (letExprNode.getType() == CALTreeParserTokenTypes.VIRTUAL_LET_REC) {
for (final ParseTreeNode defnNode : defnListNode) {
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN) {
ParseTreeNode definingExpressionNode = defnNode.getChild(3);
updateApplications(positiveArityFunctionNamesSet, freeVariablesSet, definingExpressionNode);
}
}
}
//update the applications occurring in the 'in' part of the let.
ParseTreeNode inPartNode = letExprNode.getChild(1);
updateApplications(positiveArityFunctionNamesSet, freeVariablesSet, inPartNode);
}
//now recursively lift within the defining expressions of each local function
//an interesting point is whether we should recursively lift *before* or *after* doing the lift
//of the current set of bindings (we are doing after). I think both work, but after is more consistent
//with the Johnsson style in that a lifted local function used in a some inner scope will always refer
//to the top-level lifted function, and never a variable passed as an argument.
for (final ParseTreeNode defnNode : defnListNode) {
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN) {
ParseTreeNode optionalCALDocNode = defnNode.firstChild();
optionalCALDocNode.verifyType(CALTreeParserTokenTypes.OPTIONAL_CALDOC_COMMENT);
ParseTreeNode localFunctionNameNode = optionalCALDocNode.nextSibling();
ParseTreeNode varListNode = localFunctionNameNode.nextSibling();
//we do not need to lift a case that appears immediately after a local function that is itself lifted.
liftExpressionPartOfBoundExpr(boundVariablesStack, varListNode, !hasPositiveArity(defnNode));
}
}
//If "let f1 xs1 = e1; f2 xs2 = e2; ... in e", now lift any lambdas or local function definitions in e.
ParseTreeNode inPartNode = letExprNode.getChild(1);
liftExpr(boundVariablesStack, inPartNode, caseNeedsLifting);
boundVariablesStack.popN(nLocalZeroAritySCs);
//finally remove the lifted functions from the let defn list. If everything is removed, so there is
//effectively zero local bindings defined, then just replace with an application node.
if (nFunctionsToLift == 0) {
return;
}
if (nLocalZeroAritySCs == 0) {
//lift everything so that "let f1 xs1 = e1; f2 xs2 = e2; ... in e" is
//changed to "e".
letExprNode.setType(CALTreeParserTokenTypes.APPLICATION);
letExprNode.setText("@");
letExprNode.setFirstChild(inPartNode);
return;
}
List<ParseTreeNode> defnNodesNotToLift = new ArrayList<ParseTreeNode>();
for (final ParseTreeNode defnNode : defnListNode) {
//drop the type declaration nodes (not needed anymore) and the positive arity functions
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN && !hasPositiveArity(defnNode)) {
defnNodesNotToLift.add(defnNode);
}
}
defnListNode.removeChildren();
ParseTreeNode[] remainingDefnNodes = defnNodesNotToLift.toArray(new ParseTreeNode[]{});
remainingDefnNodes[remainingDefnNodes.length - 1].setNextSibling(null);
defnListNode.addChildren(remainingDefnNodes);
}
/**
* @param liftedFunctionsSet (String Set) names of the functions that need extra arguments added to them when
* used in an expression
* @param extraArgumentsSet (String Set) the arguments that need to be added to each of the function applications
* involving a function from the liftedFunctionsSet
* @param parseTree
*/
private void updateApplications(Set<String> liftedFunctionsSet, Set<String> extraArgumentsSet, ParseTreeNode parseTree) {
int nodeType = parseTree.getType();
switch (nodeType) {
case CALTreeParserTokenTypes.VIRTUAL_LET_NONREC:
case CALTreeParserTokenTypes.VIRTUAL_LET_REC:
{
ParseTreeNode defnListNode = parseTree.firstChild();
defnListNode.verifyType(CALTreeParserTokenTypes.LET_DEFN_LIST);
for (final ParseTreeNode defnNode : defnListNode) {
if (defnNode.getType() == CALTreeParserTokenTypes.LET_DEFN) {
ParseTreeNode definingExpressionNode = defnNode.getChild(3);
updateApplications(liftedFunctionsSet, extraArgumentsSet, definingExpressionNode);
}
}
ParseTreeNode inPartNode = parseTree.getChild(1);
updateApplications(liftedFunctionsSet, extraArgumentsSet, inPartNode);
return;
}
case CALTreeParserTokenTypes.LAMBDA_DEFN :
updateApplications(liftedFunctionsSet, extraArgumentsSet, parseTree.getChild(1));
return;
case CALTreeParserTokenTypes.SELECT_DATA_CONSTRUCTOR_FIELD :
{
ParseTreeNode exprNode = parseTree.firstChild();
updateApplications(liftedFunctionsSet, extraArgumentsSet, exprNode);
break;
}
case CALTreeParserTokenTypes.VIRTUAL_DATA_CONSTRUCTOR_CASE :
case CALTreeParserTokenTypes.VIRTUAL_RECORD_CASE:
case CALTreeParserTokenTypes.VIRTUAL_TUPLE_CASE:
{
ParseTreeNode exprNode = parseTree.firstChild();
updateApplications(liftedFunctionsSet, extraArgumentsSet, exprNode);
ParseTreeNode altListNode = exprNode.nextSibling();
altListNode.verifyType(CALTreeParserTokenTypes.ALT_LIST);
for (final ParseTreeNode altNode : altListNode) {
altNode.verifyType(CALTreeParserTokenTypes.ALT);
ParseTreeNode patternNode = altNode.firstChild();
ParseTreeNode rhsExprNode = patternNode.nextSibling();
switch (patternNode.getType()) {
case CALTreeParserTokenTypes.PATTERN_CONSTRUCTOR:
case CALTreeParserTokenTypes.TUPLE_CONSTRUCTOR:
case CALTreeParserTokenTypes.VIRTUAL_UNIT_DATA_CONSTRUCTOR:
case CALTreeParserTokenTypes.LIST_CONSTRUCTOR:
case CALTreeParserTokenTypes.INT_PATTERN:
case CALTreeParserTokenTypes.CHAR_PATTERN:
case CALTreeParserTokenTypes.COLON:
case CALTreeParserTokenTypes.UNDERSCORE:
case CALTreeParserTokenTypes.RECORD_PATTERN:
{
updateApplications(liftedFunctionsSet, extraArgumentsSet, rhsExprNode);
break;
}
default:
{
patternNode.unexpectedParseTreeNode();
return;
}
}
}
return;
}
case CALTreeParserTokenTypes.LITERAL_if :
{
ParseTreeNode condExprNode = parseTree.firstChild();
updateApplications(liftedFunctionsSet, extraArgumentsSet, condExprNode);
ParseTreeNode ifTrueNode = condExprNode.nextSibling();
updateApplications(liftedFunctionsSet, extraArgumentsSet, ifTrueNode);
ParseTreeNode ifFalseNode = ifTrueNode.nextSibling();
updateApplications(liftedFunctionsSet, extraArgumentsSet, ifFalseNode);
return;
}
case CALTreeParserTokenTypes.APPLICATION :
case CALTreeParserTokenTypes.TUPLE_CONSTRUCTOR :
case CALTreeParserTokenTypes.LIST_CONSTRUCTOR :
{
for (final ParseTreeNode exprNode : parseTree) {
updateApplications(liftedFunctionsSet, extraArgumentsSet, exprNode);
}
return;
}
case CALTreeParserTokenTypes.QUALIFIED_VAR :
{
ParseTreeNode moduleNameNode = parseTree.firstChild();
ModuleName moduleName = ModuleNameUtilities.getModuleNameFromParseTree(moduleNameNode);
if (!currentModuleName.equals(moduleName)) {
//local symbols will all belong to the current module
return;
}
ParseTreeNode nameNode = moduleNameNode.nextSibling();
String functionName = nameNode.getText();
if (liftedFunctionsSet.contains(functionName)) {
//add arguments to this function application
parseTree.setType(CALTreeParserTokenTypes.APPLICATION);
parseTree.setText("@");
parseTree.setFirstChild(ParseTreeNode.makeQualifiedVarNode(moduleName, functionName, null));
for (final String extraArgName : extraArgumentsSet) {
parseTree.addChild(ParseTreeNode.makeQualifiedVarNode(moduleName, extraArgName, null));
}
}
return;
}
//data constructors
case CALTreeParserTokenTypes.QUALIFIED_CONS :
//literals
case CALTreeParserTokenTypes.INTEGER_LITERAL :
case CALTreeParserTokenTypes.FLOAT_LITERAL :
case CALTreeParserTokenTypes.CHAR_LITERAL :
case CALTreeParserTokenTypes.STRING_LITERAL :
return;
case CALTreeParserTokenTypes.RECORD_CONSTRUCTOR:
{
ParseTreeNode baseRecordNode = parseTree.firstChild();
baseRecordNode.verifyType(CALTreeParserTokenTypes.BASE_RECORD);
ParseTreeNode baseRecordExprNode = baseRecordNode.firstChild();
if (baseRecordExprNode != null) {
updateApplications(liftedFunctionsSet, extraArgumentsSet, baseRecordExprNode);
}
ParseTreeNode fieldModificationListNode = baseRecordNode.nextSibling();
fieldModificationListNode.verifyType(CALTreeParserTokenTypes.FIELD_MODIFICATION_LIST);
for (final ParseTreeNode fieldModificationNode : fieldModificationListNode) {
fieldModificationNode.verifyType(CALTreeParserTokenTypes.FIELD_EXTENSION,
CALTreeParserTokenTypes.FIELD_VALUE_UPDATE);
ParseTreeNode valueExprNode = fieldModificationNode.getChild(1);
updateApplications(liftedFunctionsSet, extraArgumentsSet, valueExprNode);
}
return;
}
case CALTreeParserTokenTypes.SELECT_RECORD_FIELD:
case CALTreeParserTokenTypes.EXPRESSION_TYPE_SIGNATURE:
{
ParseTreeNode exprNode = parseTree.firstChild();
updateApplications(liftedFunctionsSet, extraArgumentsSet, exprNode);
return;
}
//these operators should be replaced by their functional forms by this point.
case CALTreeParserTokenTypes.BARBAR :
case CALTreeParserTokenTypes.AMPERSANDAMPERSAND :
case CALTreeParserTokenTypes.PLUSPLUS :
case CALTreeParserTokenTypes.LESS_THAN :
case CALTreeParserTokenTypes.LESS_THAN_OR_EQUALS :
case CALTreeParserTokenTypes.EQUALSEQUALS :
case CALTreeParserTokenTypes.NOT_EQUALS :
case CALTreeParserTokenTypes.GREATER_THAN_OR_EQUALS :
case CALTreeParserTokenTypes.GREATER_THAN :
case CALTreeParserTokenTypes.PLUS :
case CALTreeParserTokenTypes.MINUS :
case CALTreeParserTokenTypes.ASTERISK :
case CALTreeParserTokenTypes.SOLIDUS :
case CALTreeParserTokenTypes.PERCENT:
case CALTreeParserTokenTypes.COLON :
case CALTreeParserTokenTypes.UNARY_MINUS:
case CALTreeParserTokenTypes.POUND:
case CALTreeParserTokenTypes.DOLLAR:
case CALTreeParserTokenTypes.BACKQUOTE:
default :
{
parseTree.unexpectedParseTreeNode();
return;
}
}
}
/**
* @param defnNode
* @return true if defn node is a local function of positive arity (and thus will be lifted).
*/
private static boolean hasPositiveArity(ParseTreeNode defnNode) {
defnNode.verifyType(CALTreeParserTokenTypes.LET_DEFN);
ParseTreeNode optionalCALDocNode = defnNode.firstChild();
optionalCALDocNode.verifyType(CALTreeParserTokenTypes.OPTIONAL_CALDOC_COMMENT);
ParseTreeNode localFunctionNameNode = optionalCALDocNode.nextSibling();
ParseTreeNode paramListNode = localFunctionNameNode.nextSibling();
return paramListNode.firstChild() != null;
}
/**
* Lifts \x1 ... xn -> e.
* The reason this is needed is that most machines cannot lambda expressions
* directly. Instead, these must be converted to functions.
*
* @param boundVariablesStack
* @param lambdaExprNode
*/
private void liftLambdaExpr(ArrayStack<String> boundVariablesStack, ParseTreeNode lambdaExprNode) {
lambdaExprNode.verifyType(CALTreeParserTokenTypes.LAMBDA_DEFN);
ParseTreeNode varListNode = lambdaExprNode.firstChild();
//If \xs -> e, then first lift as needed in e.
//we do not need to lift a case in the situations:
// \xs -> case e of
// \xs -> let ... in case e of
//this is because the lambda will be lifted, and so the case will find itself as a top level
//case in the lifted lambda function.
final boolean caseNeedsLifting = false;
liftExpressionPartOfBoundExpr(boundVariablesStack, varListNode, caseNeedsLifting);
//find the free variables on which the lambda depends. They will be a subset of
//the names in boundVariablesStack.
Set<String> freeVariablesSet = freeVariableFinder.findFreeNamesInLambdaExpr(boundVariablesStack, varListNode);
//If the lambda expression is: \x1 ... xn -> e and it depends on free variables y1 ... ym,
//we create a new global function
//$lambda y1 ... ym x1 ... xn = e
String lambdaSCName = lambdaLiftInfo.getNextLiftedLambdaName();
addLiftedFunction(lambdaSCName, freeVariablesSet, varListNode);
//Now replace the lambda in the parseTree by the application node
//$lambda y1 ... ym
lambdaExprNode.removeChildren();
lambdaExprNode.initialize(CALTreeParserTokenTypes.APPLICATION, "APPLICATION");
makeApplication(lambdaExprNode, lambdaSCName, freeVariablesSet);
}
/**
* Lifts case e of ... so that the case is the immediate right hand side of
* a function.
* The reason this is needed is that most machines cannot evaluate the expression 'e' lazily without
* this change. Conceptually, it is because there is no place to store the unevaluated e.
*
* If the case expression is a data constructor unpacking expression using field name-based argument extraction
* (ie. matching notation), this is converted to positional notation. This is so that later analysis can assume
* that matching notation does not exist.
*
* Also, case alt patterns for pattern groups are converted to the corresponding series of pattern constructors.
*
* @param boundVariablesStack
* @param caseExprNode
* @param caseNeedsLifting
*/
private void liftCaseExpr(ArrayStack<String> boundVariablesStack, ParseTreeNode caseExprNode, boolean caseNeedsLifting) {
caseExprNode.verifyType(CALTreeParserTokenTypes.VIRTUAL_DATA_CONSTRUCTOR_CASE,
CALTreeParserTokenTypes.VIRTUAL_RECORD_CASE,
CALTreeParserTokenTypes.VIRTUAL_TUPLE_CASE);
ParseTreeNode exprNode = caseExprNode.firstChild();
liftExpr(boundVariablesStack, exprNode, true);
ParseTreeNode altListNode = exprNode.nextSibling();
altListNode.verifyType(CALTreeParserTokenTypes.ALT_LIST);
//we do not need to lift a case that occurs immediately after a pattern
//e.g. case e of Just x -> case ..., then the second case does not need lifting.
final boolean resultExprCaseNeedsLifting = false;
for (final ParseTreeNode altNode : altListNode) {
altNode.verifyType(CALTreeParserTokenTypes.ALT);
ParseTreeNode patternNode = altNode.firstChild();
switch (patternNode.getType()) {
case CALTreeParserTokenTypes.PATTERN_CONSTRUCTOR:
{
ParseTreeNode dcArgBindingsNode = patternNode.getChild(1);
ParseTreeNode boundExprNode = patternNode.nextSibling();
switch (dcArgBindingsNode.getType()) {
case CALTreeParserTokenTypes.PATTERN_VAR_LIST:
{
// Positional notation.
liftExpressionPartOfBoundExpr (boundVariablesStack, dcArgBindingsNode, boundExprNode, resultExprCaseNeedsLifting);
break;
}
case CALTreeParserTokenTypes.FIELD_BINDING_VAR_ASSIGNMENT_LIST:
{
// Matching notation.
int nVars = pushVarNamesFromBindings(boundVariablesStack, dcArgBindingsNode);
liftExpr(boundVariablesStack, boundExprNode, caseNeedsLifting);
boundVariablesStack.popN(nVars);
break;
}
default:
{
dcArgBindingsNode.unexpectedParseTreeNode();
return;
}
}
break;
}
case CALTreeParserTokenTypes.TUPLE_CONSTRUCTOR:
case CALTreeParserTokenTypes.VIRTUAL_UNIT_DATA_CONSTRUCTOR:
case CALTreeParserTokenTypes.LIST_CONSTRUCTOR:
case CALTreeParserTokenTypes.COLON:
case CALTreeParserTokenTypes.UNDERSCORE:
liftExpressionPartOfBoundExpr (boundVariablesStack, patternNode, resultExprCaseNeedsLifting);
break;
case CALTreeParserTokenTypes.INT_PATTERN:
case CALTreeParserTokenTypes.CHAR_PATTERN:
{
ParseTreeNode boundExprNode = patternNode.nextSibling();
liftExpr(boundVariablesStack, boundExprNode, caseNeedsLifting);
break;
}
case CALTreeParserTokenTypes.RECORD_PATTERN:
{
ParseTreeNode baseRecordPatternNode = patternNode.firstChild();
baseRecordPatternNode.verifyType(CALTreeParserTokenTypes.BASE_RECORD_PATTERN);
ParseTreeNode baseRecordPatternVarNode = baseRecordPatternNode.firstChild();
int nVars = 0;
if (baseRecordPatternVarNode != null) {
if (pushVarName(boundVariablesStack, baseRecordPatternVarNode)) {
nVars++;
}
}
ParseTreeNode fieldBindingVarAssignmentListNode = baseRecordPatternNode.nextSibling();
nVars += pushVarNamesFromBindings(boundVariablesStack, fieldBindingVarAssignmentListNode);
liftExpr(boundVariablesStack, patternNode.nextSibling(), resultExprCaseNeedsLifting);
boundVariablesStack.popN(nVars);
break;
}
default:
{
patternNode.unexpectedParseTreeNode();
return;
}
}
}
if (LIFT_CASES) {
//there is no point in lifting a case if it occurs in an expression of the form
//f xs = case ... i.e. if it is at the immediate right hand side of a top level
//function anyways. In other words, it is lifted already.
//there are a few other situations as well, documented where the caseNeedsLifting flag is set.
if (caseNeedsLifting) {
//find the free variables that the case expression depends on. They will be a subset of the names
//in boundVariablesStack.
Set<String> freeVariablesSet = freeVariableFinder.findFreeNamesInExpr(boundVariablesStack, caseExprNode);
String caseSCName = lambdaLiftInfo.getNextLiftedCaseName();
liftCaseToFunction(caseSCName, caseExprNode, freeVariablesSet);
}
}
}
/**
* Do the actual work of lifting a case or data constructor field selection to a new function,
* and replace the expression with a call to that function.
* @param liftedSCName the name of the sc to create.
* @param caseLikeExprNode a case expression or a field selection node.
* @param freeVariablesSet the free variables on which the case expression depends.
*/
private void liftCaseToFunction(String liftedSCName, ParseTreeNode caseLikeExprNode, Set<String> freeVariablesSet) {
// Create the function.
ParseTreeNode paramListNode = new ParseTreeNode(CALTreeParserTokenTypes.FUNCTION_PARAM_LIST, "FUNCTION_PARAM_LIST");
paramListNode.setTypeExprForFunctionParamList(caseLikeExprNode.getTypeExprForCaseExpr());
ParseTreeNode liftedSelectionNode = new ParseTreeNode();
liftedSelectionNode.copyContentsFrom(caseLikeExprNode);
liftedSelectionNode.setNextSibling(null);
paramListNode.setNextSibling(liftedSelectionNode);
addLiftedFunction(liftedSCName, freeVariablesSet, paramListNode);
// Replace the original expression in the program parse tree with a call to the top-level function we just created.
ParseTreeNode applicationNode = new ParseTreeNode(CALTreeParserTokenTypes.APPLICATION, "@");
makeApplication(applicationNode, liftedSCName, freeVariablesSet);
ParseTreeNode nextSibling = caseLikeExprNode.nextSibling();
caseLikeExprNode.copyContentsFrom(applicationNode);
caseLikeExprNode.setNextSibling(nextSibling);
}
/**
* A helper function that lifts any lambdas or local function defns found in the expression e where
* e is part of a parseTree (varListRoot (x1 x2 ... xn) e).
* Note that generalizedVarListNode can be a VAR_LIST, TUPLE_CONSTRUCTOR, COLON, etc node. The key property is
* that it is the parent node of a list of vars, and its next sibling is a bound expression.
*
* Creation date: (2/7/01 2:25:31 PM)
* @param boundVariablesStack
* @param generalizedVarListNode
* @param caseNeedsLifting
*/
private void liftExpressionPartOfBoundExpr(ArrayStack<String> boundVariablesStack, ParseTreeNode generalizedVarListNode, boolean caseNeedsLifting) {
liftExpressionPartOfBoundExpr(boundVariablesStack, generalizedVarListNode, generalizedVarListNode.nextSibling(), caseNeedsLifting);
}
private void liftExpressionPartOfBoundExpr(ArrayStack<String> boundVariablesStack, ParseTreeNode generalizedVarListNode, ParseTreeNode boundExprNode, boolean caseNeedsLifting) {
int nVars = pushVarNames(boundVariablesStack, generalizedVarListNode);
liftExpr(boundVariablesStack, boundExprNode, caseNeedsLifting);
boundVariablesStack.popN(nVars);
}
/**
* A helper method that adds the required child nodes to applicationNode so that
* it becomes the parse tree node corresponding to the application
* of the function named functionName to the variables in freeVariablesSet.
*
* Creation date: (2/8/01 9:52:05 AM)
* @param applicationNode
* @param functionName an unqualified name
* @param freeVariablesSet
*/
private void makeApplication(ParseTreeNode applicationNode, String functionName, Set<String> freeVariablesSet) {
applicationNode.verifyType(CALTreeParserTokenTypes.APPLICATION);
ParseTreeNode functionNameNode = ParseTreeNode.makeQualifiedVarNode(currentModuleName, functionName, null);
applicationNode.setFirstChild(functionNameNode);
ParseTreeNode previousSiblingNode = functionNameNode;
for (final String varName : freeVariablesSet) {
ParseTreeNode varNode = ParseTreeNode.makeQualifiedVarNode(currentModuleName, varName, null);
previousSiblingNode.setNextSibling(varNode);
previousSiblingNode = varNode;
}
}
/**
* Push the variable name for this node onto the stack.
* @param stack
* @param patternVarNode a parse tree node representing a pattern variable or parameter.
* @return whether a variable name was pushed. False if an underscore, true otherwise.
*/
private static boolean pushVarName(final ArrayStack<String> stack, final ParseTreeNode patternVarNode) {
switch (patternVarNode.getType())
{
case CALTreeParserTokenTypes.VAR_ID:
case CALTreeParserTokenTypes.LAZY_PARAM:
case CALTreeParserTokenTypes.STRICT_PARAM:
{
stack.push(patternVarNode.getText());
return true;
}
case CALTreeParserTokenTypes.UNDERSCORE:
return false;
default:
{
throw new IllegalArgumentException();
}
}
}
/**
* Push the variable names that are child nodes of varListSubTree onto the stack.
* @param stack
* @param generalizedVarListNode
* @return int the number of variable names (size of varListSubTree minus the number of _ pattern variables)
*/
private static int pushVarNames(final ArrayStack<String> stack, final ParseTreeNode generalizedVarListNode) {
int nVars = 0;
for (final ParseTreeNode varNode : generalizedVarListNode) {
if (pushVarName(stack, varNode)) {
++nVars;
}
}
return nVars;
}
/**
* Push onto the stack the variable names that are bound to the field names in the field bindings list.
* @param stack
* @param fieldBindingVarAssignmentListNode
* @return int the number of variable names (size of varListSubTree minus the number of _ pattern variables)
*/
private static int pushVarNamesFromBindings(final ArrayStack<String> stack, final ParseTreeNode fieldBindingVarAssignmentListNode) {
int nVars = 0;
fieldBindingVarAssignmentListNode.verifyType(CALTreeParserTokenTypes.FIELD_BINDING_VAR_ASSIGNMENT_LIST);
for (final ParseTreeNode fieldBindingVarAssignmentNode : fieldBindingVarAssignmentListNode) {
fieldBindingVarAssignmentNode.verifyType(CALTreeParserTokenTypes.FIELD_BINDING_VAR_ASSIGNMENT);
ParseTreeNode fieldNameNode = fieldBindingVarAssignmentNode.firstChild();
fieldNameNode.verifyType(CALTreeParserTokenTypes.VAR_ID, CALTreeParserTokenTypes.ORDINAL_FIELD_NAME);
ParseTreeNode varNode = fieldNameNode.nextSibling();
if (pushVarName(stack, varNode)) {
++nVars;
}
}
return nVars;
}
}