PlanNode.java example

Explorer
stratosphere-master
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.compiler.plan;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.util.FieldSet;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.costs.Costs;
import eu.stratosphere.compiler.dag.OptimizerNode;
import eu.stratosphere.compiler.dag.OptimizerNode.UnclosedBranchDescriptor;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.LocalProperties;
import eu.stratosphere.compiler.plandump.DumpableConnection;
import eu.stratosphere.compiler.plandump.DumpableNode;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.util.Visitable;

/**
 * The representation of a data exchange between to operators. The data exchange can realize a shipping strategy, 
 * which established global properties, and a local strategy, which establishes local properties.
 * <p>
 * Because we currently deal only with plans where the operator order is fixed, many properties are equal
 * among candidates and are determined prior to the enumeration (such as for example constant/dynamic path membership).
 * Hence, many methods will delegate to the {@code OptimizerNode} that represents the node this candidate was
 * created for.
 */
public abstract class PlanNode implements Visitable<PlanNode>, DumpableNode<PlanNode> {
	
	protected final OptimizerNode template;
	
	protected final List<Channel> outChannels;
	
	private List<NamedChannel> broadcastInputs;
	
	private final String nodeName; 
	
	private DriverStrategy driverStrategy;	// The local strategy (sorting / hashing, ...)
	
	protected LocalProperties localProps; 			// local properties of the data produced by this node

	protected GlobalProperties globalProps;			// global properties of the data produced by this node
	
	protected Map<OptimizerNode, PlanNode> branchPlan; // the actual plan alternative chosen at a branch point
	
	protected Costs nodeCosts;						// the costs incurred by this node

	protected Costs cumulativeCosts;					// the cumulative costs of all operators in the sub-tree
	
	private long memoryPerSubTask;					// the amount of memory dedicated to each task, in bytes
	
	private int degreeOfParallelism;
	
	private int subtasksPerInstance;
	
	private boolean pFlag;							// flag for the internal pruning algorithm
	
	// --------------------------------------------------------------------------------------------
	
	public PlanNode(OptimizerNode template, String nodeName, DriverStrategy strategy) {
		this.outChannels = new ArrayList<Channel>(2);
		this.broadcastInputs = new ArrayList<NamedChannel>();
		this.template = template;
		this.nodeName = nodeName;
		this.driverStrategy = strategy;
		
		this.degreeOfParallelism = template.getDegreeOfParallelism();
		this.subtasksPerInstance = template.getSubtasksPerInstance();
		
		// check, if there is branch at this node. if yes, this candidate must be associated with
		// the branching template node.
		if (template.isBranching()) {
			this.branchPlan = new HashMap<OptimizerNode, PlanNode>(6);
			this.branchPlan.put(template, this);
		}
	}
	
	protected void mergeBranchPlanMaps(PlanNode pred1, PlanNode pred2) {
		mergeBranchPlanMaps(pred1.branchPlan, pred2.branchPlan);
	}
	
	protected void mergeBranchPlanMaps(Map<OptimizerNode, PlanNode> branchPlan1, Map<OptimizerNode, PlanNode> branchPlan2) {
		// merge the branchPlan maps according the the template's uncloseBranchesStack
		if (this.template.hasUnclosedBranches()) {
			if (this.branchPlan == null) {
				this.branchPlan = new HashMap<OptimizerNode, PlanNode>(8);
			}
	
			for (UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
				OptimizerNode brancher = uc.getBranchingNode();
				PlanNode selectedCandidate = null;
	
				if (branchPlan1 != null) {
					// predecessor 1 has branching children, see if it got the branch we are looking for
					selectedCandidate = branchPlan1.get(brancher);
				}
				
				if (selectedCandidate == null && branchPlan2 != null) {
					// predecessor 2 has branching children, see if it got the branch we are looking for
					selectedCandidate = branchPlan2.get(brancher);
				}
				
				// it may be that the branch candidate is only found once the broadcast variables are set
				if (selectedCandidate != null) {
					this.branchPlan.put(brancher, selectedCandidate);
				}
			}
		}
	}
	
	// --------------------------------------------------------------------------------------------
	//                                           Accessors
	// --------------------------------------------------------------------------------------------
	
	/**
	 * Gets the optimizer's pact node for which this plan candidate node was created.
	 * 
	 * @return The template optimizer's node.
	 */
	public OptimizerNode getOriginalOptimizerNode() {
		return this.template;
	}
	
	/**
	 * Gets the pact contract this node represents in the plan.
	 * 
	 * @return The pact contract this node represents in the plan.
	 */
	public Operator<?> getPactContract() {
		return this.template.getPactContract();
	}
	
	/**
	 * Gets the name of the plan node.
	 * 
	 * @return The name of the plan node.
	 */
	public String getNodeName() {
		return this.nodeName;
	}
	
	public int getMemoryConsumerWeight() {
		return this.driverStrategy.isMaterializing() ? 1 : 0;
	}
	
	/**
	 * Gets the memory dedicated to each sub-task for this node.
	 * 
	 * @return The memory per task, in bytes.
	 */
	public long getMemoryPerSubTask() {
		return this.memoryPerSubTask;
	}

	/**
	 * Sets the memory dedicated to each task for this node.
	 * 
	 * @param memoryPerTask The memory per sub-task, in bytes.
	 */
	public void setMemoryPerSubTask(long memoryPerTask) {
		this.memoryPerSubTask = memoryPerTask;
	}
	
	/**
	 * Gets the driver strategy from this node. This determines for example for a <i>match</i> Pact whether
	 * to use a merge or a hybrid hash strategy.
	 * 
	 * @return The driver strategy.
	 */
	public DriverStrategy getDriverStrategy() {
		return this.driverStrategy;
	}
	
	/**
	 * Sets the driver strategy for this node. Usually should not be changed.
	 * 
	 * @return The driver strategy.
	 */
	public void setDriverStrategy(DriverStrategy newDriverStrategy) {
		this.driverStrategy = newDriverStrategy;
	}
	
	public void initProperties(GlobalProperties globals, LocalProperties locals) {
		if (this.globalProps != null || this.localProps != null) {
			throw new IllegalStateException();
		}
		this.globalProps = globals;
		this.localProps = locals;
	}
	
	/**
	 * Gets the local properties from this PlanNode.
	 *
	 * @return The local properties.
	 */
	public LocalProperties getLocalProperties() {
		return this.localProps;
	}
	
	/**
	 * Gets the global properties from this PlanNode.
	 *
	 * @return The global properties.
	 */
	public GlobalProperties getGlobalProperties() {
		return this.globalProps;
	}
	
	/**
	 * Gets the costs incurred by this node. The costs reflect also the costs incurred by the shipping strategies
	 * of the incoming connections.
	 * 
	 * @return The node-costs, or null, if not yet set.
	 */
	public Costs getNodeCosts() {
		return this.nodeCosts;
	}

	/**
	 * Gets the cumulative costs of this nose. The cumulative costs are the the sum of the costs
	 * of this node and of all nodes in the subtree below this node.
	 * 
	 * @return The cumulative costs, or null, if not yet set.
	 */
	public Costs getCumulativeCosts() {
		return this.cumulativeCosts;
	}

	public Costs getCumulativeCostsShare() {
		if (this.cumulativeCosts == null){
			return null;
		} else {
			Costs result = cumulativeCosts.clone();
			if (this.template != null && this.template.getOutgoingConnections() != null) {
				int outDegree = this.template.getOutgoingConnections().size();
				if (outDegree > 0) {
					result.divideBy(outDegree);
				}
			}

			return result;
		}
	}

	
	/**
	 * Sets the basic cost for this node to the given value, and sets the cumulative costs
	 * to those costs plus the cost shares of all inputs (regular and broadcast).
	 * 
	 * @param nodeCosts	 The already knows costs for this node
	 * 						(this cost a produces by a concrete {@code OptimizerNode} subclass.
	 */
	public void setCosts(Costs nodeCosts) {
		// set the node costs
		this.nodeCosts = nodeCosts;
		
		// the cumulative costs are the node costs plus the costs of all inputs
		this.cumulativeCosts = nodeCosts.clone();
		
		// add all the normal inputs
		for (Iterator<PlanNode> preds = getPredecessors(); preds.hasNext();) {
			Costs parentCosts = preds.next().getCumulativeCostsShare();
			if (parentCosts != null) {
				this.cumulativeCosts.addCosts(parentCosts);
			} else {
				throw new CompilerException("Trying to set the costs of an operator before the predecessor costs are computed.");
			}
		}
		
		// add all broadcast variable inputs
		if (this.broadcastInputs != null) {
			for (NamedChannel nc : this.broadcastInputs) {
				Costs bcInputCost = nc.getSource().getCumulativeCostsShare();
				if (bcInputCost != null) {
					this.cumulativeCosts.addCosts(bcInputCost);
				} else {
					throw new CompilerException("Trying to set the costs of an operator before the broadcast input costs are computed.");
				}
			}
		}
	}
	
	public void setDegreeOfParallelism(int parallelism) {
		this.degreeOfParallelism = parallelism;
	}
	
	public void setSubtasksPerInstance(int subTasksPerInstance) {
		this.subtasksPerInstance = subTasksPerInstance;
	}
	
	public int getDegreeOfParallelism() {
		return this.degreeOfParallelism;
	}
	
	public int getSubtasksPerInstance() {
		return this.subtasksPerInstance;
	}
	
	public long getGuaranteedAvailableMemory() {
		return this.template.getMinimalMemoryAcrossAllSubTasks();
	}

	public Map<OptimizerNode, PlanNode> getBranchPlan() {
		return branchPlan;
	}
	
	// --------------------------------------------------------------------------------------------
	//                               Input, Predecessors, Successors
	// --------------------------------------------------------------------------------------------
	
	public abstract Iterator<Channel> getInputs();
	
	@Override
	public abstract Iterator<PlanNode> getPredecessors();
	
	/**
	 * Sets a list of all broadcast inputs attached to this node.
	 */
	public void setBroadcastInputs(List<NamedChannel> broadcastInputs) {
		if (broadcastInputs != null) {
			this.broadcastInputs = broadcastInputs;
			
			// update the branch map
			for (NamedChannel nc : broadcastInputs) {
				PlanNode source = nc.getSource();
				
				mergeBranchPlanMaps(branchPlan, source.branchPlan);
			}
		}
		
		// do a sanity check that if we are branching, we have now candidates for each branch point
		if (this.template.hasUnclosedBranches()) {
			if (this.branchPlan == null) {
				throw new CompilerException("Branching and rejoining logic did not find a candidate for the branching point.");
			}
	
			for (UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
				OptimizerNode brancher = uc.getBranchingNode();
				if (this.branchPlan.get(brancher) == null) {
					throw new CompilerException("Branching and rejoining logic did not find a candidate for the branching point.");
				}
			}
		}
	}
	
	/**
	 * Gets a list of all broadcast inputs attached to this node.
	 */
	public List<NamedChannel> getBroadcastInputs() {
		return this.broadcastInputs;
	}
	
	/**
	 * Adds a channel to a successor node to this node.
	 * 
	 * @param channel The channel to the successor.
	 */
	public void addOutgoingChannel(Channel channel) {
		this.outChannels.add(channel);
	}
	
	/**
	 * Gets a list of all outgoing channels leading to successors.
	 * 
	 * @return A list of all channels leading to successors.
	 */
	public List<Channel> getOutgoingChannels() {
		return this.outChannels;
	}
	
	
	// --------------------------------------------------------------------------------------------
	//                                Miscellaneous
	// --------------------------------------------------------------------------------------------
	
	public void updatePropertiesWithUniqueSets(Set<FieldSet> uniqueFieldCombinations) {
		if (uniqueFieldCombinations == null || uniqueFieldCombinations.isEmpty()) {
			return;
		}
		for (FieldSet fields : uniqueFieldCombinations) {
			this.globalProps.addUniqueFieldCombination(fields);
			this.localProps.addUniqueFields(fields);
		}
	}

	public PlanNode getCandidateAtBranchPoint(OptimizerNode branchPoint) {
		if(branchPlan == null){
			return null;
		}else{
			return this.branchPlan.get(branchPoint);
		}
	}
	
	/**
	 * Sets the pruning marker to true.
	 */
	public void setPruningMarker() {
		this.pFlag = true;
	}
	
	/**
	 * Checks whether the pruning marker was set.
	 * 
	 * @return True, if the pruning marker was set, false otherwise.
	 */
	public boolean isPruneMarkerSet() {
		return this.pFlag;
	}
	
	public boolean isOnDynamicPath() {
		return this.template.isOnDynamicPath();
	}
	
	public int getCostWeight() {
		return this.template.getCostWeight();
	}
	
	// --------------------------------------------------------------------------------------------
	
	public abstract SourceAndDamReport hasDamOnPathDownTo(PlanNode source);
	
	// --------------------------------------------------------------------------------------------
	

	@Override
	public String toString() {
		return this.template.getName() + " \"" + getPactContract().getName() + "\" : " + this.driverStrategy +
				" [[ " + this.globalProps + " ]] [[ " + this.localProps + " ]]";
	}
	
	// --------------------------------------------------------------------------------------------
	

	@Override
	public OptimizerNode getOptimizerNode() {
		return this.template;
	}


	@Override
	public PlanNode getPlanNode() {
		return this;
	}


	@Override
	public Iterator<DumpableConnection<PlanNode>> getDumpableInputs() {
		List<DumpableConnection<PlanNode>> allInputs = new ArrayList<DumpableConnection<PlanNode>>();
		
		for (Iterator<Channel> inputs = getInputs(); inputs.hasNext();) {
			allInputs.add(inputs.next());
		}
		
		for (NamedChannel c : getBroadcastInputs()) {
			allInputs.add(c);
		}
		
		return allInputs.iterator();
	}
	
	public static enum SourceAndDamReport {
		NOT_FOUND, FOUND_SOURCE, FOUND_SOURCE_AND_DAM;
	}
}