OptimizerNode.java example

Explorer
flink-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.optimizer.dag;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.operators.AbstractUdfOperator;
import org.apache.flink.api.common.operators.CompilerHints;
import org.apache.flink.api.common.operators.Operator;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.util.FieldSet;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.DataStatistics;
import org.apache.flink.optimizer.costs.CostEstimator;
import org.apache.flink.optimizer.dataproperties.InterestingProperties;
import org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedLocalProperties;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plandump.DumpableConnection;
import org.apache.flink.optimizer.plandump.DumpableNode;
import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
import org.apache.flink.util.Visitable;
import org.apache.flink.util.Visitor;

/**
 * The OptimizerNode is the base class of all nodes in the optimizer DAG. The optimizer DAG is the
 * optimizer's representation of a program, created before the actual optimization (which creates different
 * candidate plans and computes their cost).
 * <p>
 * Nodes in the DAG correspond (almost) one-to-one to the operators in a program. The optimizer DAG is constructed
 * to hold the additional information that the optimizer needs:
 * <ul>
 *     <li>Estimates of the data size processed by each operator</li>
 *     <li>Helper structures to track where the data flow "splits" and "joins", to support flows that are
 *         DAGs but not trees.</li>
 *     <li>Tags and weights to differentiate between loop-variant and -invariant parts of an iteration</li>
 *     <li>Interesting properties to be used during the enumeration of candidate plans</li>
 * </ul>
 */
public abstract class OptimizerNode implements Visitable<OptimizerNode>, EstimateProvider, DumpableNode<OptimizerNode> {
	
	public static final int MAX_DYNAMIC_PATH_COST_WEIGHT = 100;
	
	// --------------------------------------------------------------------------------------------
	//                                      Members
	// --------------------------------------------------------------------------------------------

	private final Operator<?> operator; // The operator (Reduce / Join / DataSource / ...)
	
	private List<String> broadcastConnectionNames = new ArrayList<String>(); // the broadcast inputs names of this node
	
	private List<DagConnection> broadcastConnections = new ArrayList<DagConnection>(); // the broadcast inputs of this node
	
	private List<DagConnection> outgoingConnections; // The links to succeeding nodes

	private InterestingProperties intProps; // the interesting properties of this node
	
	// --------------------------------- Branch Handling ------------------------------------------

	protected List<UnclosedBranchDescriptor> openBranches; // stack of branches in the sub-graph that are not joined
	
	protected Set<OptimizerNode> closedBranchingNodes; 	// stack of branching nodes which have already been closed
	
	protected List<OptimizerNode> hereJoinedBranches;	// the branching nodes (node with multiple outputs)
														// that are partially joined (through multiple inputs or broadcast vars)

	// ---------------------------- Estimates and Annotations -------------------------------------
	
	protected long estimatedOutputSize = -1; // the estimated size of the output (bytes)

	protected long estimatedNumRecords = -1; // the estimated number of key/value pairs in the output
	
	protected Set<FieldSet> uniqueFields; // set of attributes that will always be unique after this node

	// --------------------------------- General Parameters ---------------------------------------
	
	private int parallelism = ExecutionConfig.PARALLELISM_DEFAULT; // the number of parallel instances of this node

	private long minimalMemoryPerSubTask = -1;

	protected int id = -1; 				// the id for this node.
	
	protected int costWeight = 1;		// factor to weight the costs for dynamic paths
	
	protected boolean onDynamicPath;
	
	protected List<PlanNode> cachedPlans;	// cache candidates, because the may be accessed repeatedly

	// ------------------------------------------------------------------------
	//                      Constructor / Setup
	// ------------------------------------------------------------------------

	/**
	 * Creates a new optimizer node that represents the given program operator.
	 * 
	 * @param op The operator that the node represents.
	 */
	public OptimizerNode(Operator<?> op) {
		this.operator = op;
		readStubAnnotations();
	}
	
	protected OptimizerNode(OptimizerNode toCopy) {
		this.operator = toCopy.operator;
		this.intProps = toCopy.intProps;
		
		this.openBranches = toCopy.openBranches;
		this.closedBranchingNodes = toCopy.closedBranchingNodes;
		
		this.estimatedOutputSize = toCopy.estimatedOutputSize;
		this.estimatedNumRecords = toCopy.estimatedNumRecords;
		
		this.parallelism = toCopy.parallelism;
		this.minimalMemoryPerSubTask = toCopy.minimalMemoryPerSubTask;
		
		this.id = toCopy.id;
		this.costWeight = toCopy.costWeight;
		this.onDynamicPath = toCopy.onDynamicPath;
	}

	// ------------------------------------------------------------------------
	//  Methods specific to unary- / binary- / special nodes
	// ------------------------------------------------------------------------

	/**
	 * Gets the name of this node, which is the name of the function/operator, or
	 * data source / data sink.
	 * 
	 * @return The node name.
	 */
	public abstract String getOperatorName();

	/**
	 * This function connects the predecessors to this operator.
	 *
	 * @param operatorToNode The map from program operators to optimizer nodes.
	 * @param defaultExchangeMode The data exchange mode to use, if the operator does not
	 *                            specify one.
	 */
	public abstract void setInput(Map<Operator<?>, OptimizerNode> operatorToNode,
									ExecutionMode defaultExchangeMode);

	/**
	 * This function connects the operators that produce the broadcast inputs to this operator.
	 *
	 * @param operatorToNode The map from program operators to optimizer nodes.
	 * @param defaultExchangeMode The data exchange mode to use, if the operator does not
	 *                            specify one.
	 *
	 * @throws CompilerException
	 */
	public void setBroadcastInputs(Map<Operator<?>, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode) {
		// skip for Operators that don't support broadcast variables 
		if (!(getOperator() instanceof AbstractUdfOperator<?, ?>)) {
			return;
		}

		// get all broadcast inputs
		AbstractUdfOperator<?, ?> operator = ((AbstractUdfOperator<?, ?>) getOperator());

		// create connections and add them
		for (Map.Entry<String, Operator<?>> input : operator.getBroadcastInputs().entrySet()) {
			OptimizerNode predecessor = operatorToNode.get(input.getValue());
			DagConnection connection = new DagConnection(predecessor, this,
															ShipStrategyType.BROADCAST, defaultExchangeMode);
			addBroadcastConnection(input.getKey(), connection);
			predecessor.addOutgoingConnection(connection);
		}
	}

	/**
	 * Gets all incoming connections of this node.
	 * This method needs to be overridden by subclasses to return the children.
	 * 
	 * @return The list of incoming connections.
	 */
	public abstract List<DagConnection> getIncomingConnections();

	/**
	 * Tells the node to compute the interesting properties for its inputs. The interesting properties
	 * for the node itself must have been computed before.
	 * The node must then see how many of interesting properties it preserves and add its own.
	 * 
	 * @param estimator The {@code CostEstimator} instance to use for plan cost estimation.
	 */
	public abstract void computeInterestingPropertiesForInputs(CostEstimator estimator);

	/**
	 * This method causes the node to compute the description of open branches in its sub-plan. An open branch
	 * describes, that a (transitive) child node had multiple outputs, which have not all been re-joined in the
	 * sub-plan. This method needs to set the <code>openBranches</code> field to a stack of unclosed branches, the
	 * latest one top. A branch is considered closed, if some later node sees all of the branching node's outputs,
	 * no matter if there have been more branches to different paths in the meantime.
	 */
	public abstract void computeUnclosedBranchStack();
	
	
	protected List<UnclosedBranchDescriptor> computeUnclosedBranchStackForBroadcastInputs(
															List<UnclosedBranchDescriptor> branchesSoFar)
	{
		// handle the data flow branching for the broadcast inputs
		for (DagConnection broadcastInput : getBroadcastConnections()) {
			OptimizerNode bcSource = broadcastInput.getSource();
			addClosedBranches(bcSource.closedBranchingNodes);
			
			List<UnclosedBranchDescriptor> bcBranches = bcSource.getBranchesForParent(broadcastInput);
			
			ArrayList<UnclosedBranchDescriptor> mergedBranches = new ArrayList<UnclosedBranchDescriptor>();
			mergeLists(branchesSoFar, bcBranches, mergedBranches, true);
			branchesSoFar = mergedBranches.isEmpty() ? Collections.<UnclosedBranchDescriptor>emptyList() : mergedBranches;
		}
		
		return branchesSoFar;
	}

	/**
	 * Computes the plan alternatives for this node, an implicitly for all nodes that are children of
	 * this node. This method must determine for each alternative the global and local properties
	 * and the costs. This method may recursively call <code>getAlternatives()</code> on its children
	 * to get their plan alternatives, and build its own alternatives on top of those.
	 * 
	 * @param estimator
	 *        The cost estimator used to estimate the costs of each plan alternative.
	 * @return A list containing all plan alternatives.
	 */
	public abstract List<PlanNode> getAlternativePlans(CostEstimator estimator);

	/**
	 * This method implements the visit of a depth-first graph traversing visitor. Implementers must first
	 * call the <code>preVisit()</code> method, then hand the visitor to their children, and finally call
	 * the <code>postVisit()</code> method.
	 * 
	 * @param visitor
	 *        The graph traversing visitor.
	 * @see org.apache.flink.util.Visitable#accept(org.apache.flink.util.Visitor)
	 */
	@Override
	public abstract void accept(Visitor<OptimizerNode> visitor);

	public abstract SemanticProperties getSemanticProperties();

	// ------------------------------------------------------------------------
	//                          Getters / Setters
	// ------------------------------------------------------------------------

	@Override
	public Iterable<OptimizerNode> getPredecessors() {
		List<OptimizerNode> allPredecessors = new ArrayList<OptimizerNode>();

		for (DagConnection dagConnection : getIncomingConnections()) {
			allPredecessors.add(dagConnection.getSource());
		}
		
		for (DagConnection conn : getBroadcastConnections()) {
			allPredecessors.add(conn.getSource());
		}
		
		return allPredecessors;
	}
	
	/**
	 * Gets the ID of this node. If the id has not yet been set, this method returns -1;
	 * 
	 * @return This node's id, or -1, if not yet set.
	 */
	public int getId() {
		return this.id;
	}

	/**
	 * Sets the ID of this node.
	 * 
	 * @param id
	 *        The id for this node.
	 */
	public void initId(int id) {
		if (id <= 0) {
			throw new IllegalArgumentException();
		}
		
		if (this.id == -1) {
			this.id = id;
		} else {
			throw new IllegalStateException("Id has already been initialized.");
		}
	}

	/**
	 * Adds the broadcast connection identified by the given {@code name} to this node.
	 * 
	 * @param broadcastConnection The connection to add.
	 */
	public void addBroadcastConnection(String name, DagConnection broadcastConnection) {
		this.broadcastConnectionNames.add(name);
		this.broadcastConnections.add(broadcastConnection);
	}

	/**
	 * Return the list of names associated with broadcast inputs for this node.
	 */
	public List<String> getBroadcastConnectionNames() {
		return this.broadcastConnectionNames;
	}

	/**
	 * Return the list of inputs associated with broadcast variables for this node.
	 */
	public List<DagConnection> getBroadcastConnections() {
		return this.broadcastConnections;
	}

	/**
	 * Adds a new outgoing connection to this node.
	 * 
	 * @param connection
	 *        The connection to add.
	 */
	public void addOutgoingConnection(DagConnection connection) {
		if (this.outgoingConnections == null) {
			this.outgoingConnections = new ArrayList<DagConnection>();
		} else {
			if (this.outgoingConnections.size() == 64) {
				throw new CompilerException("Cannot currently handle nodes with more than 64 outputs.");
			}
		}

		this.outgoingConnections.add(connection);
	}

	/**
	 * The list of outgoing connections from this node to succeeding tasks.
	 * 
	 * @return The list of outgoing connections.
	 */
	public List<DagConnection> getOutgoingConnections() {
		return this.outgoingConnections;
	}

	/**
	 * Gets the operator represented by this optimizer node.
	 * 
	 * @return This node's operator.
	 */
	public Operator<?> getOperator() {
		return this.operator;
	}

	/**
	 * Gets the parallelism for the operator represented by this optimizer node.
	 * The parallelism denotes how many parallel instances of the operator on will be
	 * spawned during the execution. If this value is {@link ExecutionConfig#PARALLELISM_DEFAULT}
	 * then the system will take the default number of parallel instances.
	 * 
	 * @return The parallelism of the operator.
	 */
	public int getParallelism() {
		return this.parallelism;
	}

	/**
	 * Sets the parallelism for this optimizer node.
	 * The parallelism denotes how many parallel instances of the operator will be
	 * spawned during the execution.
	 * 
	 * @param parallelism The parallelism to set. If this value is {@link ExecutionConfig#PARALLELISM_DEFAULT}
	 *        then the system will take the default number of parallel instances.
	 * @throws IllegalArgumentException If the parallelism is smaller than one.
	 */
	public void setParallelism(int parallelism) {
		if (parallelism < 1 && parallelism != ExecutionConfig.PARALLELISM_DEFAULT) {
			throw new IllegalArgumentException("Parallelism of " + parallelism + " is invalid.");
		}
		this.parallelism = parallelism;
	}
	
	/**
	 * Gets the amount of memory that all subtasks of this task have jointly available.
	 * 
	 * @return The total amount of memory across all subtasks.
	 */
	public long getMinimalMemoryAcrossAllSubTasks() {
		return this.minimalMemoryPerSubTask == -1 ? -1 : this.minimalMemoryPerSubTask * this.parallelism;
	}
	
	public boolean isOnDynamicPath() {
		return this.onDynamicPath;
	}
	
	public void identifyDynamicPath(int costWeight) {
		boolean anyDynamic = false;
		boolean allDynamic = true;
		
		for (DagConnection conn : getIncomingConnections()) {
			boolean dynamicIn = conn.isOnDynamicPath();
			anyDynamic |= dynamicIn;
			allDynamic &= dynamicIn;
		}
		
		for (DagConnection conn : getBroadcastConnections()) {
			boolean dynamicIn = conn.isOnDynamicPath();
			anyDynamic |= dynamicIn;
			allDynamic &= dynamicIn;
		}
		
		if (anyDynamic) {
			this.onDynamicPath = true;
			this.costWeight = costWeight;
			if (!allDynamic) {
				// this node joins static and dynamic path.
				// mark the connections where the source is not dynamic as cached
				for (DagConnection conn : getIncomingConnections()) {
					if (!conn.getSource().isOnDynamicPath()) {
						conn.setMaterializationMode(conn.getMaterializationMode().makeCached());
					}
				}
				
				// broadcast variables are always cached, because they stay unchanged available in the
				// runtime context of the functions
			}
		}
	}
	
	public int getCostWeight() {
		return this.costWeight;
	}
	
	public int getMaxDepth() {
		int maxDepth = 0;
		for (DagConnection conn : getIncomingConnections()) {
			maxDepth = Math.max(maxDepth, conn.getMaxDepth());
		}
		for (DagConnection conn : getBroadcastConnections()) {
			maxDepth = Math.max(maxDepth, conn.getMaxDepth());
		}
		
		return maxDepth;
	}

	/**
	 * Gets the properties that are interesting for this node to produce.
	 * 
	 * @return The interesting properties for this node, or null, if not yet computed.
	 */
	public InterestingProperties getInterestingProperties() {
		return this.intProps;
	}

	@Override
	public long getEstimatedOutputSize() {
		return this.estimatedOutputSize;
	}

	@Override
	public long getEstimatedNumRecords() {
		return this.estimatedNumRecords;
	}
	
	public void setEstimatedOutputSize(long estimatedOutputSize) {
		this.estimatedOutputSize = estimatedOutputSize;
	}

	public void setEstimatedNumRecords(long estimatedNumRecords) {
		this.estimatedNumRecords = estimatedNumRecords;
	}
	
	@Override
	public float getEstimatedAvgWidthPerOutputRecord() {
		if (this.estimatedOutputSize > 0 && this.estimatedNumRecords > 0) {
			return ((float) this.estimatedOutputSize) / this.estimatedNumRecords;
		} else {
			return -1.0f;
		}
	}

	/**
	 * Checks whether this node has branching output. A node's output is branched, if it has more
	 * than one output connection.
	 * 
	 * @return True, if the node's output branches. False otherwise.
	 */
	public boolean isBranching() {
		return getOutgoingConnections() != null && getOutgoingConnections().size() > 1;
	}

	public void markAllOutgoingConnectionsAsPipelineBreaking() {
		if (this.outgoingConnections == null) {
			throw new IllegalStateException("The outgoing connections have not yet been initialized.");
		}
		for (DagConnection conn : getOutgoingConnections()) {
			conn.markBreaksPipeline();
		}
	}

	// ------------------------------------------------------------------------
	//                              Miscellaneous
	// ------------------------------------------------------------------------

	/**
	 * Checks, if all outgoing connections have their interesting properties set from their target nodes.
	 * 
	 * @return True, if on all outgoing connections, the interesting properties are set. False otherwise.
	 */
	public boolean haveAllOutputConnectionInterestingProperties() {
		for (DagConnection conn : getOutgoingConnections()) {
			if (conn.getInterestingProperties() == null) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Computes all the interesting properties that are relevant to this node. The interesting
	 * properties are a union of the interesting properties on each outgoing connection.
	 * However, if two interesting properties on the outgoing connections overlap,
	 * the interesting properties will occur only once in this set. For that, this
	 * method deduplicates and merges the interesting properties.
	 * This method returns copies of the original interesting properties objects and
	 * leaves the original objects, contained by the connections, unchanged.
	 */
	public void computeUnionOfInterestingPropertiesFromSuccessors() {
		List<DagConnection> conns = getOutgoingConnections();
		if (conns.size() == 0) {
			// no incoming, we have none ourselves
			this.intProps = new InterestingProperties();
		} else {
			this.intProps = conns.get(0).getInterestingProperties().clone();
			for (int i = 1; i < conns.size(); i++) {
				this.intProps.addInterestingProperties(conns.get(i).getInterestingProperties());
			}
		}
		this.intProps.dropTrivials();
	}
	
	public void clearInterestingProperties() {
		this.intProps = null;
		for (DagConnection conn : getIncomingConnections()) {
			conn.clearInterestingProperties();
		}
		for (DagConnection conn : getBroadcastConnections()) {
			conn.clearInterestingProperties();
		}
	}
	
	/**
	 * Causes this node to compute its output estimates (such as number of rows, size in bytes)
	 * based on the inputs and the compiler hints. The compiler hints are instantiated with conservative
	 * default values which are used if no other values are provided. Nodes may access the statistics to
	 * determine relevant information.
	 * 
	 * @param statistics
	 *        The statistics object which may be accessed to get statistical information.
	 *        The parameter may be null, if no statistics are available.
	 */
	public void computeOutputEstimates(DataStatistics statistics) {
		// sanity checking
		for (DagConnection c : getIncomingConnections()) {
			if (c.getSource() == null) {
				throw new CompilerException("Bug: Estimate computation called before inputs have been set.");
			}
		}
		
		// let every operator do its computation
		computeOperatorSpecificDefaultEstimates(statistics);
		
		if (this.estimatedOutputSize < 0) {
			this.estimatedOutputSize = -1;
		}
		if (this.estimatedNumRecords < 0) {
			this.estimatedNumRecords = -1;
		}
		
		// overwrite default estimates with hints, if given
		if (getOperator() == null || getOperator().getCompilerHints() == null) {
			return ;
		}
		
		CompilerHints hints = getOperator().getCompilerHints();
		if (hints.getOutputSize() >= 0) {
			this.estimatedOutputSize = hints.getOutputSize();
		}
		
		if (hints.getOutputCardinality() >= 0) {
			this.estimatedNumRecords = hints.getOutputCardinality();
		}
		
		if (hints.getFilterFactor() >= 0.0f) {
			if (this.estimatedNumRecords >= 0) {
				this.estimatedNumRecords = (long) (this.estimatedNumRecords * hints.getFilterFactor());
				
				if (this.estimatedOutputSize >= 0) {
					this.estimatedOutputSize = (long) (this.estimatedOutputSize * hints.getFilterFactor());
				}
			}
			else if (this instanceof SingleInputNode) {
				OptimizerNode pred = ((SingleInputNode) this).getPredecessorNode();
				if (pred != null && pred.getEstimatedNumRecords() >= 0) {
					this.estimatedNumRecords = (long) (pred.getEstimatedNumRecords() * hints.getFilterFactor());
				}
			}
		}
		
		// use the width to infer the cardinality (given size) and vice versa
		if (hints.getAvgOutputRecordSize() >= 1) {
			// the estimated number of rows based on size
			if (this.estimatedNumRecords == -1 && this.estimatedOutputSize >= 0) {
				this.estimatedNumRecords = (long) (this.estimatedOutputSize / hints.getAvgOutputRecordSize());
			}
			else if (this.estimatedOutputSize == -1 && this.estimatedNumRecords >= 0) {
				this.estimatedOutputSize = (long) (this.estimatedNumRecords * hints.getAvgOutputRecordSize());
			}
		}
	}
	
	protected abstract void computeOperatorSpecificDefaultEstimates(DataStatistics statistics);
	
	// ------------------------------------------------------------------------
	// Reading of stub annotations
	// ------------------------------------------------------------------------
	
	/**
	 * Reads all stub annotations, i.e. which fields remain constant, what cardinality bounds the
	 * functions have, which fields remain unique.
	 */
	protected void readStubAnnotations() {
		readUniqueFieldsAnnotation();
	}
	
	protected void readUniqueFieldsAnnotation() {
		if (this.operator.getCompilerHints() != null) {
			Set<FieldSet> uniqueFieldSets = operator.getCompilerHints().getUniqueFields();
			if (uniqueFieldSets != null) {
				if (this.uniqueFields == null) {
					this.uniqueFields = new HashSet<FieldSet>();
				}
				this.uniqueFields.addAll(uniqueFieldSets);
			}
		}
	}
	
	// ------------------------------------------------------------------------
	// Access of stub annotations
	// ------------------------------------------------------------------------
	
	/**
	 * Gets the FieldSets which are unique in the output of the node. 
	 */
	public Set<FieldSet> getUniqueFields() {
		return this.uniqueFields == null ? Collections.<FieldSet>emptySet() : this.uniqueFields;
	}
	
	// --------------------------------------------------------------------------------------------
	//                                    Pruning
	// --------------------------------------------------------------------------------------------
	
	protected void prunePlanAlternatives(List<PlanNode> plans) {
		if (plans.isEmpty()) {
			throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
		}
		// shortcut for the simple case
		if (plans.size() == 1) {
			return;
		}
		
		// we can only compare plan candidates that made equal choices
		// at the branching points. for each choice at a branching point,
		// we need to keep the cheapest (wrt. interesting properties).
		// if we do not keep candidates for each branch choice, we might not
		// find branch compatible candidates when joining the branches back.
		
		// for pruning, we are quasi AFTER the node, so in the presence of
		// branches, we need form the per-branch-choice groups by the choice
		// they made at the latest un-joined branching node. Note that this is
		// different from the check for branch compatibility of candidates, as
		// this happens on the input sub-plans and hence BEFORE the node (therefore
		// it is relevant to find the latest (partially) joined branch point.
		
		if (this.openBranches == null || this.openBranches.isEmpty()) {
			prunePlanAlternativesWithCommonBranching(plans);
		} else {
			// partition the candidates into groups that made the same sub-plan candidate
			// choice at the latest unclosed branch point
			
			final OptimizerNode[] branchDeterminers = new OptimizerNode[this.openBranches.size()];
			
			for (int i = 0; i < branchDeterminers.length; i++) {
				branchDeterminers[i] = this.openBranches.get(this.openBranches.size() - 1 - i).getBranchingNode();
			}
			
			// this sorter sorts by the candidate choice at the branch point
			Comparator<PlanNode> sorter = new Comparator<PlanNode>() {
				
				@Override
				public int compare(PlanNode o1, PlanNode o2) {
					for (OptimizerNode branchDeterminer : branchDeterminers) {
						PlanNode n1 = o1.getCandidateAtBranchPoint(branchDeterminer);
						PlanNode n2 = o2.getCandidateAtBranchPoint(branchDeterminer);
						int hash1 = System.identityHashCode(n1);
						int hash2 = System.identityHashCode(n2);

						if (hash1 != hash2) {
							return hash1 - hash2;
						}
					}
					return 0;
				}
			};
			Collections.sort(plans, sorter);
			
			List<PlanNode> result = new ArrayList<PlanNode>();
			List<PlanNode> turn = new ArrayList<PlanNode>();
			
			final PlanNode[] determinerChoice = new PlanNode[branchDeterminers.length];

			while (!plans.isEmpty()) {
				// take one as the determiner
				turn.clear();
				PlanNode determiner = plans.remove(plans.size() - 1);
				turn.add(determiner);
				
				for (int i = 0; i < determinerChoice.length; i++) {
					determinerChoice[i] = determiner.getCandidateAtBranchPoint(branchDeterminers[i]);
				}

				// go backwards through the plans and find all that are equal
				boolean stillEqual = true;
				for (int k = plans.size() - 1; k >= 0 && stillEqual; k--) {
					PlanNode toCheck = plans.get(k);
					
					for (int i = 0; i < branchDeterminers.length; i++) {
						PlanNode checkerChoice = toCheck.getCandidateAtBranchPoint(branchDeterminers[i]);
					
						if (checkerChoice != determinerChoice[i]) {
							// not the same anymore
							stillEqual = false;
							break;
						}
					}
					
					if (stillEqual) {
						// the same
						plans.remove(k);
						turn.add(toCheck);
					}
				}

				// now that we have only plans with the same branch alternatives, prune!
				if (turn.size() > 1) {
					prunePlanAlternativesWithCommonBranching(turn);
				}
				result.addAll(turn);
			}

			// after all turns are complete
			plans.clear();
			plans.addAll(result);
		}
	}
	
	protected void prunePlanAlternativesWithCommonBranching(List<PlanNode> plans) {
		// for each interesting property, which plans are cheapest
		final RequestedGlobalProperties[] gps = this.intProps.getGlobalProperties().toArray(
							new RequestedGlobalProperties[this.intProps.getGlobalProperties().size()]);
		final RequestedLocalProperties[] lps = this.intProps.getLocalProperties().toArray(
							new RequestedLocalProperties[this.intProps.getLocalProperties().size()]);
		
		final PlanNode[][] toKeep = new PlanNode[gps.length][];
		final PlanNode[] cheapestForGlobal = new PlanNode[gps.length];
		
		
		PlanNode cheapest = null; // the overall cheapest plan

		// go over all plans from the list
		for (PlanNode candidate : plans) {
			// check if that plan is the overall cheapest
			if (cheapest == null || (cheapest.getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0)) {
				cheapest = candidate;
			}

			// find the interesting global properties that this plan matches
			for (int i = 0; i < gps.length; i++) {
				if (gps[i].isMetBy(candidate.getGlobalProperties())) {
					// the candidate meets the global property requirements. That means
					// it has a chance that its local properties are re-used (they would be
					// destroyed if global properties need to be established)
					
					if (cheapestForGlobal[i] == null || (cheapestForGlobal[i].getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0)) {
						cheapestForGlobal[i] = candidate;
					}
					
					final PlanNode[] localMatches;
					if (toKeep[i] == null) {
						localMatches = new PlanNode[lps.length];
						toKeep[i] = localMatches;
					} else {
						localMatches = toKeep[i];
					}
					
					for (int k = 0; k < lps.length; k++) {
						if (lps[k].isMetBy(candidate.getLocalProperties())) {
							final PlanNode previous = localMatches[k];
							if (previous == null || previous.getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0) {
								// this one is cheaper!
								localMatches[k] = candidate;
							}
						}
					}
				}
			}
		}

		// all plans are set now
		plans.clear();

		// add the cheapest plan
		if (cheapest != null) {
			plans.add(cheapest);
			cheapest.setPruningMarker(); // remember that that plan is in the set
		}

		// add all others, which are optimal for some interesting properties
		for (int i = 0; i < gps.length; i++) {
			if (toKeep[i] != null) {
				final PlanNode[] localMatches = toKeep[i];
				for (final PlanNode n : localMatches) {
					if (n != null && !n.isPruneMarkerSet()) {
						n.setPruningMarker();
						plans.add(n);
					}
				}
			}
			if (cheapestForGlobal[i] != null) {
				final PlanNode n = cheapestForGlobal[i];
				if (!n.isPruneMarkerSet()) {
					n.setPruningMarker();
					plans.add(n);
				}
			}
		}
	}
	
	
	// --------------------------------------------------------------------------------------------
	//                       Handling of branches
	// --------------------------------------------------------------------------------------------

	public boolean hasUnclosedBranches() {
		return this.openBranches != null && !this.openBranches.isEmpty();
	}

	public Set<OptimizerNode> getClosedBranchingNodes() {
		return this.closedBranchingNodes;
	}
	
	public List<UnclosedBranchDescriptor> getOpenBranches() {
		return this.openBranches;
	}


	protected List<UnclosedBranchDescriptor> getBranchesForParent(DagConnection toParent) {
		if (this.outgoingConnections.size() == 1) {
			// return our own stack of open branches, because nothing is added
			if (this.openBranches == null || this.openBranches.isEmpty()) {
				return Collections.emptyList();
			} else {
				return new ArrayList<UnclosedBranchDescriptor>(this.openBranches);
			}
		}
		else if (this.outgoingConnections.size() > 1) {
			// we branch add a branch info to the stack
			List<UnclosedBranchDescriptor> branches = new ArrayList<UnclosedBranchDescriptor>(4);
			if (this.openBranches != null) {
				branches.addAll(this.openBranches);
			}

			// find out, which output number the connection to the parent
			int num;
			for (num = 0; num < this.outgoingConnections.size(); num++) {
				if (this.outgoingConnections.get(num) == toParent) {
					break;
				}
			}
			if (num >= this.outgoingConnections.size()) {
				throw new CompilerException("Error in compiler: "
					+ "Parent to get branch info for is not contained in the outgoing connections.");
			}

			// create the description and add it
			long bitvector = 0x1L << num;
			branches.add(new UnclosedBranchDescriptor(this, bitvector));
			return branches;
		}
		else {
			throw new CompilerException(
				"Error in compiler: Cannot get branch info for successor in a node with no successors.");
		}
	}

	
	protected void removeClosedBranches(List<UnclosedBranchDescriptor> openList) {
		if (openList == null || openList.isEmpty() || this.closedBranchingNodes == null || this.closedBranchingNodes.isEmpty()) {
			return;
		}
		
		Iterator<UnclosedBranchDescriptor> it = openList.iterator();
		while (it.hasNext()) {
			if (this.closedBranchingNodes.contains(it.next().getBranchingNode())) {
				//this branch was already closed --> remove it from the list
				it.remove();
			}
		}
	}
	
	protected void addClosedBranches(Set<OptimizerNode> alreadyClosed) {
		if (alreadyClosed == null || alreadyClosed.isEmpty()) {
			return;
		}
		
		if (this.closedBranchingNodes == null) { 
			this.closedBranchingNodes = new HashSet<OptimizerNode>(alreadyClosed);
		} else {
			this.closedBranchingNodes.addAll(alreadyClosed);
		}
	}
	
	protected void addClosedBranch(OptimizerNode alreadyClosed) {
		if (this.closedBranchingNodes == null) { 
			this.closedBranchingNodes = new HashSet<OptimizerNode>();
		}

		this.closedBranchingNodes.add(alreadyClosed);
	}
	
	/**
	 * Checks whether to candidate plans for the sub-plan of this node are comparable. The two
	 * alternative plans are comparable, if
	 * 
	 * a) There is no branch in the sub-plan of this node
	 * b) Both candidates have the same candidate as the child at the last open branch. 
	 * 
	 * @param plan1 The root node of the first candidate plan.
	 * @param plan2 The root node of the second candidate plan.
	 * @return True if the nodes are branch compatible in the inputs.
	 */
	protected boolean areBranchCompatible(PlanNode plan1, PlanNode plan2) {
		if (plan1 == null || plan2 == null) {
			throw new NullPointerException();
		}
		
		// if there is no open branch, the children are always compatible.
		// in most plans, that will be the dominant case
		if (this.hereJoinedBranches == null || this.hereJoinedBranches.isEmpty()) {
			return true;
		}

		for (OptimizerNode joinedBrancher : hereJoinedBranches) {
			final PlanNode branch1Cand = plan1.getCandidateAtBranchPoint(joinedBrancher);
			final PlanNode branch2Cand = plan2.getCandidateAtBranchPoint(joinedBrancher);
			
			if (branch1Cand != null && branch2Cand != null && branch1Cand != branch2Cand) {
				return false;
			}
		}
		return true;
	}
	
	/**
	 * The node IDs are assigned in graph-traversal order (pre-order), hence, each list is
	 * sorted by ID in ascending order and all consecutive lists start with IDs in ascending order.
	 *
	 * @param markJoinedBranchesAsPipelineBreaking True, if the
	 */
	protected final boolean mergeLists(List<UnclosedBranchDescriptor> child1open,
										List<UnclosedBranchDescriptor> child2open,
										List<UnclosedBranchDescriptor> result,
										boolean markJoinedBranchesAsPipelineBreaking) {

		//remove branches which have already been closed
		removeClosedBranches(child1open);
		removeClosedBranches(child2open);
		
		result.clear();
		
		// check how many open branches we have. the cases:
		// 1) if both are null or empty, the result is null
		// 2) if one side is null (or empty), the result is the other side.
		// 3) both are set, then we need to merge.
		if (child1open == null || child1open.isEmpty()) {
			if(child2open != null && !child2open.isEmpty()) {
				result.addAll(child2open);
			}
			return false;
		}
		
		if (child2open == null || child2open.isEmpty()) {
			result.addAll(child1open);
			return false;
		}

		int index1 = child1open.size() - 1;
		int index2 = child2open.size() - 1;
		
		boolean didCloseABranch = false;

		// as both lists (child1open and child2open) are sorted in ascending ID order
		// we can do a merge-join-like loop which preserved the order in the result list
		// and eliminates duplicates
		while (index1 >= 0 || index2 >= 0) {
			int id1 = -1;
			int id2 = index2 >= 0 ? child2open.get(index2).getBranchingNode().getId() : -1;

			while (index1 >= 0 && (id1 = child1open.get(index1).getBranchingNode().getId()) > id2) {
				result.add(child1open.get(index1));
				index1--;
			}
			while (index2 >= 0 && (id2 = child2open.get(index2).getBranchingNode().getId()) > id1) {
				result.add(child2open.get(index2));
				index2--;
			}

			// match: they share a common branching child
			if (id1 == id2) {
				didCloseABranch = true;
				
				// if this is the latest common child, remember it
				OptimizerNode currBanchingNode = child1open.get(index1).getBranchingNode();
				
				long vector1 = child1open.get(index1).getJoinedPathsVector();
				long vector2 = child2open.get(index2).getJoinedPathsVector();
				
				// check if this is the same descriptor, (meaning that it contains the same paths)
				// if it is the same, add it only once, otherwise process the join of the paths
				if (vector1 == vector2) {
					result.add(child1open.get(index1));
				}
				else {
					// we merge (re-join) a branch

					// mark the branch as a point where we break the pipeline
					if (markJoinedBranchesAsPipelineBreaking) {
						currBanchingNode.markAllOutgoingConnectionsAsPipelineBreaking();
					}

					if (this.hereJoinedBranches == null) {
						this.hereJoinedBranches = new ArrayList<OptimizerNode>(2);
					}
					this.hereJoinedBranches.add(currBanchingNode);

					// see, if this node closes the branch
					long joinedInputs = vector1 | vector2;

					// this is 2^size - 1, which is all bits set at positions 0..size-1
					long allInputs = (0x1L << currBanchingNode.getOutgoingConnections().size()) - 1;

					if (joinedInputs == allInputs) {
						// closed - we can remove it from the stack
						addClosedBranch(currBanchingNode);
					} else {
						// not quite closed
						result.add(new UnclosedBranchDescriptor(currBanchingNode, joinedInputs));
					}
				}

				index1--;
				index2--;
			}
		}

		// merged. now we need to reverse the list, because we added the elements in reverse order
		Collections.reverse(result);
		return didCloseABranch;
	}

	@Override
	public OptimizerNode getOptimizerNode() {
		return this;
	}
	
	@Override
	public PlanNode getPlanNode() {
		return null;
	}
	
	@Override
	public Iterable<DumpableConnection<OptimizerNode>> getDumpableInputs() {
		List<DumpableConnection<OptimizerNode>> allInputs = new ArrayList<DumpableConnection<OptimizerNode>>();
		
		allInputs.addAll(getIncomingConnections());
		allInputs.addAll(getBroadcastConnections());
		
		return allInputs;
	}
	
	@Override
	public String toString() {
		StringBuilder bld = new StringBuilder();

		bld.append(getOperatorName());
		bld.append(" (").append(getOperator().getName()).append(") ");

		int i = 1; 
		for (DagConnection conn : getIncomingConnections()) {
			String shipStrategyName = conn.getShipStrategy() == null ? "null" : conn.getShipStrategy().name();
			bld.append('(').append(i++).append(":").append(shipStrategyName).append(')');
		}

		return bld.toString();
	}

	// --------------------------------------------------------------------------------------------

	/**
	 * Description of an unclosed branch. An unclosed branch is when the data flow branched (one operator's
	 * result is consumed by multiple targets), but these different branches (targets) have not been joined
	 * together.
	 */
	public static final class UnclosedBranchDescriptor {

		protected OptimizerNode branchingNode;

		protected long joinedPathsVector;

		/**
		 * Creates a new branching descriptor.
		 *
		 * @param branchingNode The node where the branch occurred (teh node with multiple outputs).
		 * @param joinedPathsVector A bit vector describing which branches are tracked by this descriptor.
		 *                          The bit vector is one, where the branch is tracked, zero otherwise.
		 */
		protected UnclosedBranchDescriptor(OptimizerNode branchingNode, long joinedPathsVector) {
			this.branchingNode = branchingNode;
			this.joinedPathsVector = joinedPathsVector;
		}

		public OptimizerNode getBranchingNode() {
			return this.branchingNode;
		}

		public long getJoinedPathsVector() {
			return this.joinedPathsVector;
		}

		@Override
		public String toString() {
			return "(" + this.branchingNode.getOperator() + ") [" + this.joinedPathsVector + "]";
		}
	}
}