/***********************************************************************************************************************
*
* Copyright (C) 2010 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.pact.compiler.plan;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import eu.stratosphere.nephele.configuration.Configuration;
import eu.stratosphere.pact.common.contract.Contract;
import eu.stratosphere.pact.common.contract.MatchContract;
import eu.stratosphere.pact.common.contract.Order;
import eu.stratosphere.pact.common.contract.Ordering;
import eu.stratosphere.pact.common.util.FieldList;
import eu.stratosphere.pact.common.util.FieldSet;
import eu.stratosphere.pact.compiler.CompilerException;
import eu.stratosphere.pact.compiler.Costs;
import eu.stratosphere.pact.compiler.GlobalProperties;
import eu.stratosphere.pact.compiler.LocalProperties;
import eu.stratosphere.pact.compiler.PactCompiler;
import eu.stratosphere.pact.compiler.PartitionProperty;
import eu.stratosphere.pact.compiler.costs.CostEstimator;
import eu.stratosphere.pact.runtime.shipping.ShipStrategy;
import eu.stratosphere.pact.runtime.shipping.ShipStrategy.BroadcastSS;
import eu.stratosphere.pact.runtime.shipping.ShipStrategy.ForwardSS;
import eu.stratosphere.pact.runtime.shipping.ShipStrategy.PartitionHashSS;
import eu.stratosphere.pact.runtime.shipping.ShipStrategy.PartitionRangeSS;
import eu.stratosphere.pact.runtime.shipping.ShipStrategy.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.util.TaskConfig.LocalStrategy;
/**
* The Optimizer representation of a <i>Match</i> contract node.
*
* @author Stephan Ewen (stephan.ewen@tu-berlin.de)
*/
public class MatchNode extends TwoInputNode {
/**
* Creates a new MatchNode for the given contract.
*
* @param pactContract
* The match contract object.
*/
public MatchNode(MatchContract pactContract) {
super(pactContract);
// see if an internal hint dictates the strategy to use
Configuration conf = getPactContract().getParameters();
String localStrategy = conf.getString(PactCompiler.HINT_LOCAL_STRATEGY, null);
if (localStrategy != null) {
if (PactCompiler.HINT_LOCAL_STRATEGY_SORT_BOTH_MERGE.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.SORT_BOTH_MERGE);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_SORT_FIRST_MERGE.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.SORT_FIRST_MERGE);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_SORT_SECOND_MERGE.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.SORT_SECOND_MERGE);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_MERGE.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.MERGE);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.HYBRIDHASH_FIRST);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.HYBRIDHASH_SECOND);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_INMEM_HASH_BUILD_FIRST.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.MMHASH_FIRST);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_INMEM_HASH_BUILD_SECOND.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.MMHASH_SECOND);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_SORT_SELF_NESTEDLOOP.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.SORT_SELF_NESTEDLOOP);
} else if (PactCompiler.HINT_LOCAL_STRATEGY_SELF_NESTEDLOOP.equals(localStrategy)) {
setLocalStrategy(LocalStrategy.SELF_NESTEDLOOP);
} else {
throw new CompilerException("Invalid local strategy hint for match contract: " + localStrategy);
}
} else {
setLocalStrategy(LocalStrategy.NONE);
}
}
/**
* Copy constructor to create a copy of a node with different predecessors. The predecessors
* is assumed to be of the same type as in the template node and merely copies with different
* strategies, as they are created in the process of the plan enumeration.
*
* @param template
* The node to create a copy of.
* @param pred1
* The new predecessor for the first input.
* @param pred2
* The new predecessor for the second input.
* @param conn1
* The old connection of the first input to copy properties from.
* @param conn2
* The old connection of the second input to copy properties from.
* @param globalProps
* The global properties of this copy.
* @param localProps
* The local properties of this copy.
*/
protected MatchNode(MatchNode template, OptimizerNode pred1, OptimizerNode pred2, PactConnection conn1,
PactConnection conn2, GlobalProperties globalProps, LocalProperties localProps) {
super(template, pred1, pred2, conn1, conn2, globalProps, localProps);
}
// ------------------------------------------------------------------------
/**
* Gets the contract object for this match node.
*
* @return The contract.
*/
@Override
public MatchContract getPactContract() {
return (MatchContract) super.getPactContract();
}
/*
* (non-Javadoc)
* @see eu.stratosphere.pact.compiler.plan.OptimizerNode#getName()
*/
@Override
public String getName() {
return "Match";
}
/*
* (non-Javadoc)
* @see eu.stratosphere.pact.compiler.plan.OptimizerNode#isMemoryConsumer()
*/
@Override
public int getMemoryConsumerCount() {
switch(this.localStrategy) {
case SORT_BOTH_MERGE: return 2;
case SORT_FIRST_MERGE: return 1;
case SORT_SECOND_MERGE: return 1;
case MERGE: return 1;
case HYBRIDHASH_FIRST: return 1;
case HYBRIDHASH_SECOND: return 1;
case MMHASH_FIRST: return 1;
case MMHASH_SECOND: return 1;
case SORT_SELF_NESTEDLOOP: return 2;
case SELF_NESTEDLOOP: return 1;
default: return 0;
}
}
/*
* (non-Javadoc)
* @see eu.stratosphere.pact.compiler.plan.OptimizerNode#setInputs(java.util.Map)
*/
@Override
public void setInputs(Map<Contract, OptimizerNode> contractToNode) {
super.setInputs(contractToNode);
}
/*
* (non-Javadoc)
* @see eu.stratosphere.pact.compiler.plan.OptimizerNode#getIncomingConnections()
*/
@Override
public List<PactConnection> getIncomingConnections() {
ArrayList<PactConnection> inputs = new ArrayList<PactConnection>(2);
inputs.add(input1);
if(this.localStrategy != LocalStrategy.SELF_NESTEDLOOP && this.localStrategy != LocalStrategy.SORT_SELF_NESTEDLOOP) {
// check for self match
inputs.add(input2);
}
return inputs;
}
/*
* (non-Javadoc)
* @see eu.stratosphere.pact.compiler.plan.OptimizerNode#computeInterestingProperties()
*/
@Override
public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
// first, get all incoming interesting properties and see, how they can be propagated to the
// children, depending on the output contract.
List<InterestingProperties> thisNodesIntProps = getInterestingProperties();
List<InterestingProperties> props1 = InterestingProperties.createInterestingPropertiesForInput(thisNodesIntProps,
this, 0);
List<InterestingProperties> props2 = InterestingProperties.createInterestingPropertiesForInput(thisNodesIntProps,
this, 1);
// a match is always interested in the following properties from both inputs:
// 1) any-partition and order
// 2) partition only
createInterestingProperties(this.input1, props1, estimator, 0);
this.input1.addAllInterestingProperties(props1);
createInterestingProperties(this.input2, props2, estimator, 1);
this.input2.addAllInterestingProperties(props2);
}
/**
* Utility method that generates for the given input interesting properties about partitioning and
* order.
*
* @param input
* The input to generate the interesting properties for.
* @param target
* The list to add the interesting properties to.
* @param estimator
* The cost estimator to estimate the maximal costs for the interesting properties.
*/
private void createInterestingProperties(PactConnection input, List<InterestingProperties> target,
CostEstimator estimator, int inputNum) {
InterestingProperties p = new InterestingProperties();
FieldList keys = null;
switch(inputNum) {
case 0:
keys = this.keySet1;
break;
case 1:
keys = this.keySet2;
break;
default:
new CompilerException("Invalid input number "+inputNum+" for Match.");
}
// partition and any order
p.getGlobalProperties().setPartitioning(PartitionProperty.ANY, (FieldList)keys.clone());
Ordering ordering = new Ordering();
for (Integer index : getPactContract().getKeyColumnNumbers(inputNum)) {
ordering.appendOrdering(index, null, Order.ANY);
}
p.getLocalProperties().setOrdering(ordering);
estimator.getHashPartitioningCost(input, p.getMaximalCosts());
Costs c = new Costs();
estimator.getLocalSortCost(this, input, c);
p.getMaximalCosts().addCosts(c);
InterestingProperties.mergeUnionOfInterestingProperties(target, p);
// partition only
p = new InterestingProperties();
p.getGlobalProperties().setPartitioning(PartitionProperty.ANY, (FieldList)keys.clone());
estimator.getHashPartitioningCost(input, p.getMaximalCosts());
InterestingProperties.mergeUnionOfInterestingProperties(target, p);
}
/*
* (non-Javadoc)
* @see eu.stratosphere.pact.compiler.plan.TwoInputNode#computeValidPlanAlternatives(java.util.List, java.util.List, eu.stratosphere.pact.compiler.costs.CostEstimator, java.util.List)
*/
@Override
protected void computeValidPlanAlternatives(List<? extends OptimizerNode> altSubPlans1,
List<? extends OptimizerNode> altSubPlans2, CostEstimator estimator, List<OptimizerNode> outputPlans)
{
for(OptimizerNode subPlan1 : altSubPlans1) {
for(OptimizerNode subPlan2 : altSubPlans2) {
// check, whether the two children have the same
// sub-plan in the common part before the branches
if (!areBranchCompatible(subPlan1, subPlan2)) {
continue;
}
ShipStrategy ss1 = this.input1.getShipStrategy();
ShipStrategy ss2 = this.input2.getShipStrategy();
// check for self match
// if (areBranchesEqual(subPlan1, subPlan2)) {
// // we have a self match
//
// if(ss1 != ShipStrategy.NONE && ss2 != ShipStrategy.NONE && ss1.equals(ss2)) {
// // ShipStrategy is forced on both inputs
// createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss1, estimator);
// } else if (ss1 != ShipStrategy.NONE && ss2 == ShipStrategy.NONE) {
// // ShipStrategy is forced on first input
// createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss1, estimator);
// } else if (ss1 == ShipStrategy.NONE && ss2 != ShipStrategy.NONE) {
// // ShipStrategy is forced on second input
// createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss2, ss2, estimator);
// } else if(ss1 != ShipStrategy.NONE && ss2 != ShipStrategy.NONE && !ss1.equals(ss2)) {
// // incompatible ShipStrategies enforced
// continue;
// }
//
// GlobalProperties gp;
// gp = subPlan1.getGlobalProperties();
//
// if(!partitioningIsOnRightFields(gp, 0) || gp.getPartitioning().equals(PartitionProperty.NONE)) {
// // we need to partition
// // TODO: include range partitioning
// createLocalAlternatives(outputPlans, subPlan1, subPlan2, ShipStrategy.PARTITION_HASH, ShipStrategy.PARTITION_HASH, estimator);
// } else {
// // input is already partitioned
// createLocalAlternatives(outputPlans, subPlan1, subPlan2, ShipStrategy.FORWARD, ShipStrategy.FORWARD, estimator);
// }
//
// // check next alternative
// continue;
// }
GlobalProperties gp1;
GlobalProperties gp2;
// test which degree of freedom we have in choosing the shipping strategies
// some may be fixed a priori by compiler hints
if (ss1.type() == ShipStrategyType.NONE) {
// the first connection is free to choose for the compiler
gp1 = subPlan1.getGlobalPropertiesForParent(this);
if (ss2.type() == ShipStrategyType.NONE) {
// case: both are free to choose
gp2 = subPlan2.getGlobalPropertiesForParent(this);
// test, if one side is pre-partitioned
// if that is the case, partitioning the other side accordingly is
// the cheapest thing to do
if (partitioningIsOnRightFields(gp1, 0) && gp1.getPartitioning().isComputablyPartitioned()) {
ss1 = new ForwardSS();
}
if (partitioningIsOnRightFields(gp2, 1) && gp2.getPartitioning().isComputablyPartitioned()) {
// input is partitioned
// check, whether that partitioning is the same as the one of input one!
if (!partitioningIsOnRightFields(gp1, 0) || !gp1.getPartitioning().isComputablyPartitioned()) {
ss2 = new ForwardSS();
}
else {
if (gp1.getPartitioning().isCompatibleWith(gp2.getPartitioning()) &&
partitioningIsOnSameSubkey(gp1.getPartitionedFields(),gp2.getPartitionedFields())) {
ss2 = new ForwardSS();
} else {
// both sides are partitioned, but in an incompatible way
// 3 alternatives:
// 1) re-partition 2 the same way as 1
// 2) re-partition 1 the same way as 2
if (gp1.getPartitioning() == PartitionProperty.HASH_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new ForwardSS(),
new PartitionHashSS(this.keySet2), estimator);
} else if (gp1.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new ForwardSS(),
new PartitionRangeSS(this.keySet2), estimator);
}
if (gp2.getPartitioning() == PartitionProperty.HASH_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionHashSS(this.keySet1),
new ForwardSS(), estimator);
} else if (gp2.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionRangeSS(this.keySet1),
new ForwardSS(), estimator);
}
// do not go through the remaining logic of the loop!
continue;
}
}
}
// create the alternative nodes. the strategies to create depend on the different
// combinations of pre-existing partitions
if (ss1.type() == ShipStrategyType.FORWARD) {
if (ss2.type() == ShipStrategyType.FORWARD) {
// both are equally pre-partitioned
// we need not use any special shipping step
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss2, estimator);
// we create an additional plan with a range partitioning
// if this is not already a range partitioning
if (gp1.getPartitioning() != PartitionProperty.RANGE_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionRangeSS(this.keySet1),
new PartitionRangeSS(this.keySet2), estimator);
}
} else {
// input 1 is local-forward
// add two plans:
// 1) make input 2 the same partitioning as input 1
// 2) partition both inputs with a different partitioning function (hash <-> range)
if (partitioningIsOnRightFields(gp1, 0) && gp1.getPartitioning() == PartitionProperty.HASH_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1,
new PartitionHashSS(this.keySet2), estimator);
// createLocalAlternatives(outputPlans, predList1, predList2, ShipStrategy.PARTITION_RANGE,
// ShipStrategy.PARTITION_RANGE, estimator);
} else if (partitioningIsOnRightFields(gp1, 0) && gp1.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1,
new PartitionRangeSS(this.keySet2), estimator);
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionHashSS(this.keySet1),
new PartitionHashSS(this.keySet2), estimator);
} else {
throw new CompilerException("Invalid partitioning property for input 1 of match '"
+ getPactContract().getName() + "'.");
}
}
} else if (ss2.type() == ShipStrategyType.FORWARD) {
// input 2 is local-forward
// add two plans:
// 1) make input 1 the same partitioning as input 2
// 2) partition both inputs with a different partitioning function (hash <-> range)
if (partitioningIsOnRightFields(gp2, 1) && gp2.getPartitioning() == PartitionProperty.HASH_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionHashSS(this.keySet1), ss2,
estimator);
// createLocalAlternatives(outputPlans, predList1, predList2, ShipStrategy.PARTITION_RANGE,
// ShipStrategy.PARTITION_RANGE, estimator);
} else if (partitioningIsOnRightFields(gp2, 1) && gp2.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionRangeSS(this.keySet1), ss2,
estimator);
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionHashSS(this.keySet1),
new PartitionHashSS(this.keySet2), estimator);
} else {
throw new CompilerException("Invalid partitioning property for input 2 of match '"
+ getPactContract().getName() + "'.");
}
} else {
// all of the shipping strategies are free to choose.
// none has a pre-existing partitioning. create all options:
// 1) re-partition both by hash
// 2) re-partition both by range
// 3) broadcast the first input (forward the second)
// 4) broadcast the second input (forward the first)
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new PartitionHashSS(this.keySet1),
new PartitionHashSS(this.keySet2), estimator);
// createLocalAlternatives(outputPlans, pred1, pred2, ShipStrategy.PARTITION_RANGE,
// ShipStrategy.PARTITION_RANGE, estimator);
// add the broadcasting strategies only, if the sizes of can be estimated
if (haveValidOutputEstimates(subPlan1) && haveValidOutputEstimates(subPlan2)) {
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new BroadcastSS(),
new ForwardSS(), estimator);
createLocalAlternatives(outputPlans, subPlan1, subPlan2, new ForwardSS(),
new BroadcastSS(), estimator);
}
}
} else {
gp2 = PactConnection.getGlobalPropertiesAfterConnection(subPlan2, this, 1, ss2);
// first connection free to choose, but second one is fixed
// 1) input 2 is broadcast -> other side must be forward
// 2) input 2 is forward -> other side must be broadcast, or repartitioned, if the forwarded
// side is partitioned
// 3) input 2 is hash-partition -> other side must be re-partition by hash as well
// 4) input 2 is range-partition -> other side must be re-partition by range as well
switch (ss2.type()) {
case BROADCAST:
ss1 = new ForwardSS();;
break;
case FORWARD:
if (partitioningIsOnRightFields(gp2, 1) && gp2.getPartitioning().isPartitioned()) {
// adapt to the partitioning
if (gp2.getPartitioning() == PartitionProperty.HASH_PARTITIONED) {
ss1 = new PartitionHashSS(this.keySet1);
} else if (gp2.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
ss1 = new PartitionRangeSS(this.keySet1);
} else {
throw new CompilerException();
}
} else {
// must broadcast
ss1 = new BroadcastSS();
}
break;
case PARTITION_HASH:
ss1 = (partitioningIsOnSameSubkey(gp1.getPartitionedFields(), this.keySet2) && gp1.getPartitioning() == PartitionProperty.HASH_PARTITIONED) ? new ForwardSS()
: new PartitionHashSS(this.keySet1);
break;
case PARTITION_RANGE:
ss1 = (partitioningIsOnRightFields(gp1, 0) && gp1.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) ? new ForwardSS()
: new PartitionRangeSS(this.keySet1);
break;
default:
throw new CompilerException("Invalid fixed shipping strategy '" + ss2.name()
+ "' for match contract '" + getPactContract().getName() + "'.");
}
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss2, estimator);
}
} else if (ss2.type() == ShipStrategyType.NONE) {
// second connection free to choose, but first one is fixed
gp1 = PactConnection.getGlobalPropertiesAfterConnection(subPlan1, this, 0, ss1);
gp2 = subPlan2.getGlobalPropertiesForParent(this);
// 1) input 1 is broadcast -> other side must be forward
// 2) input 1 is forward -> other side must be broadcast, if forwarded side is not partitioned
// 3) input 1 is hash-partition -> other side must be re-partition by hash as well
// 4) input 1 is range-partition -> other side must be re-partition by range as well
switch (ss1.type()) {
case BROADCAST:
ss2 = new ForwardSS();;
break;
case FORWARD:
if (partitioningIsOnRightFields(gp1, 0) && gp1.getPartitioning().isPartitioned()) {
// adapt to the partitioning
if (gp1.getPartitioning() == PartitionProperty.HASH_PARTITIONED) {
ss2 = new PartitionHashSS(this.keySet2);
} else if (gp1.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
ss2 = new PartitionRangeSS(this.keySet2);
} else {
throw new CompilerException();
}
} else {
// must broadcast
ss2 = new BroadcastSS();
}
break;
case PARTITION_HASH:
ss2 = (partitioningIsOnSameSubkey(this.keySet1, gp2.getPartitionedFields()) && partitioningIsOnRightFields(gp2, 1) && gp2.getPartitioning() == PartitionProperty.HASH_PARTITIONED) ? new ForwardSS()
: new PartitionHashSS(this.keySet2);
break;
case PARTITION_RANGE:
ss2 = (partitioningIsOnRightFields(gp2, 1) && gp2.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) ? new ForwardSS()
: new PartitionRangeSS(this.keySet2);
break;
default:
throw new CompilerException("Invalid fixed shipping strategy '" + ss1.name()
+ "' for match contract '" + getPactContract().getName() + "'.");
}
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss2, estimator);
} else {
// both are fixed
// check, if they produce a valid plan
if ((ss1.type() == ShipStrategyType.BROADCAST && ss2.type() != ShipStrategyType.BROADCAST)
|| (ss1.type() != ShipStrategyType.BROADCAST && ss2.type() == ShipStrategyType.BROADCAST)) {
// the broadcast / not-broadcast combinations are legal
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss2, estimator);
} else {
// they need to have an equal partitioning
gp1 = PactConnection.getGlobalPropertiesAfterConnection(subPlan1, this, 0, ss1);
gp2 = PactConnection.getGlobalPropertiesAfterConnection(subPlan2, this, 1, ss2);
if (gp1.getPartitioning().isComputablyPartitioned() && gp1.getPartitioning() == gp2.getPartitioning() &&
partitioningIsOnSameSubkey(gp1.getPartitionedFields(), gp2.getPartitionedFields())) {
// partitioning there and equal
createLocalAlternatives(outputPlans, subPlan1, subPlan2, ss1, ss2, estimator);
} else {
// no valid plan possible with that combination of shipping strategies and pre-existing
// properties
continue;
}
}
}
}
}
}
/**
* Private utility method that generates the alternative Match nodes, given fixed shipping strategies
* for the inputs.
*
* @param target
* The list to put the alternatives in.
* @param allPreds1
* The predecessor nodes for the first input.
* @param allPreds2
* The predecessor nodes for the second input.
* @param ss1
* The shipping strategy for the first input.
* @param ss2
* The shipping strategy for the second input.
* @param estimator
* The cost estimator.
*/
private void createLocalAlternatives(List<OptimizerNode> target, OptimizerNode subPlan1, OptimizerNode subPlan2,
ShipStrategy ss1, ShipStrategy ss2, CostEstimator estimator)
{
// compute the given properties of the incoming data
LocalProperties lp1;
LocalProperties lp2;
lp1 = PactConnection.getLocalPropertiesAfterConnection(subPlan1, this, ss1);
lp2 = PactConnection.getLocalPropertiesAfterConnection(subPlan2, this, ss2);
// create alternatives for different local strategies
LocalStrategy ls = getLocalStrategy();
if (ls != LocalStrategy.NONE) {
// local strategy is fixed
// set the local properties accordingly
if (ls == LocalStrategy.SORT_BOTH_MERGE || ls == LocalStrategy.SORT_FIRST_MERGE
|| ls == LocalStrategy.SORT_SECOND_MERGE || ls == LocalStrategy.MERGE) {
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, ls, Order.ASCENDING, true, null, estimator);
} else if (ls == LocalStrategy.HYBRIDHASH_FIRST || ls == LocalStrategy.HYBRIDHASH_SECOND
|| ls == LocalStrategy.MMHASH_FIRST || ls == LocalStrategy.MMHASH_SECOND) {
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, ls, Order.NONE, false, null, estimator);
} else if (ls == LocalStrategy.SORT_SELF_NESTEDLOOP) {
createMatchAlternative(target, subPlan1, null, ss1, null, ls, Order.ASCENDING, true, null, estimator);
} else if (ls == LocalStrategy.SELF_NESTEDLOOP) {
LocalProperties outLp = new LocalProperties();
outLp.setOrdering(lp1.getOrdering());
outLp.setGrouped(true, lp1.getGroupedFields());
createMatchAlternative(target, subPlan1, null, ss1, null, ls, Order.ANY, true, outLp, estimator);
}
} else {
// if (!areBranchesEqual(subPlan1, subPlan2) || !this.keySet1.equals(this.keySet2)) {
// this is not a self match
// create the hash strategies only, if we have estimates for the input sized
if (haveValidOutputEstimates(subPlan1) && haveValidOutputEstimates(subPlan2))
{
// create the hybrid-hash strategy where the first input is the building side
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.HYBRIDHASH_FIRST, Order.NONE, false,
null, estimator);
// create the hybrid-hash strategy where the second input is the building side
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.HYBRIDHASH_SECOND, Order.NONE, false,
null, estimator);
}
// create sort merge strategy depending on pre-existing orders
int[] keyColumns = getPactContract().getKeyColumnNumbers(0);
Ordering ordering1 = new Ordering();
for (int keyColumn : keyColumns) {
ordering1.appendOrdering(keyColumn, null, Order.ASCENDING);
}
keyColumns = getPactContract().getKeyColumnNumbers(1);
Ordering ordering2 = new Ordering();
for (int keyColumn : keyColumns) {
ordering2.appendOrdering(keyColumn, null, Order.ASCENDING);
}
// set local strategy according to pre-existing ordering
if (ordering1.isMetBy(lp1.getOrdering()) && ordering2.isMetBy(lp2.getOrdering())) {
// both inputs have ascending order
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.MERGE, Order.ASCENDING, true, null, estimator);
} else if (!ordering1.isMetBy(lp1.getOrdering()) && ordering2.isMetBy(lp2.getOrdering())) {
// input 2 has ascending order, input 1 does not
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.SORT_FIRST_MERGE, Order.ASCENDING, true, null, estimator);
} else if (ordering1.isMetBy(lp1.getOrdering()) && !ordering2.isMetBy(lp2.getOrdering())) {
// input 1 has ascending order, input 2 does not
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.SORT_SECOND_MERGE, Order.ASCENDING, true, null, estimator);
} else {
// none of the inputs has ascending order
createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.SORT_BOTH_MERGE, Order.ASCENDING, true, null, estimator);
}
// } else {
// // this is a self match
// FieldSet keyFields = new FieldSet(getPactContract().getKeyColumnNumbers(0));
// if(lp1.isGrouped() && keyFields.equals(lp1.getGroupedFields())) {
// // output will have order of input
// LocalProperties outLp = new LocalProperties();
// outLp.setOrdering(lp1.getOrdering());
// outLp.setGrouped(true, lp1.getGroupedFields());
// // self match without sorting
// createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.SELF_NESTEDLOOP, Order.ANY, true, outLp, estimator);
// } else {
// // output will be ascendingly sorted
// // self match with sorting
// createMatchAlternative(target, subPlan1, subPlan2, ss1, ss2, LocalStrategy.SORT_SELF_NESTEDLOOP, Order.ASCENDING, true, null, estimator);
// }
//
// }
}
}
/**
* If we have multiple connection for both inputs, the branches are equal if all predecessors
* per input are the same as for the other input. Ie., if the two sets (the order does not matter)
* of both predecessors are equal.
*
* Eg.:
* allPreds1 = { A, B, C } == allPreds2 = { B, A, C }
* allPreds1 = { A, B, C } != allPreds2 = { A, B }
* allPreds1 = { A, B, C } != allPreds2 = { A, B, C, D }
* allPreds1 = { A, B, C } != allPreds2 = { A, B, E }
*
* @param allPreds1 All predecessors of the first input.
* @param allPreds2 All predecessors of the second input.
*
* @return {@code true} if branches are equal, {@code false} otherwise.
*/
@SuppressWarnings("unused")
private boolean areBranchesEqual(List<OptimizerNode> allPreds1, List<OptimizerNode> allPreds2) {
final int size1 = allPreds1.size();
final int size2 = allPreds2.size();
List<OptimizerNode> copy1 = new ArrayList<OptimizerNode>(size1);
List<OptimizerNode> copy2 = new ArrayList<OptimizerNode>(size2);
for(int i = 0; i < size1; ++i)
copy1.add(allPreds1.get(i));
for(int i = 0; i < size2; ++i)
copy2.add(allPreds2.get(i));
outter:
for(int i = 0; i < copy1.size(); ++i) {
OptimizerNode nodeToTest = copy1.get(i);
for(int j = i + i; j < copy2.size(); ++j) {
if(nodeToTest.equals(copy2.get(j))) {
copy1.remove(i);
--i;
copy2.remove(j);
continue outter;
}
}
return false;
}
assert (copy1.size() == 0 && copy2.size() == 0);
return true;
}
/**
* Private utility method that generates a candidate Match node, given fixed shipping strategies and a fixed
* local strategy.
*
* @param target
* The list to put the alternatives in.
* @param subPlan1
* The predecessor node for the first input.
* @param subPlan2
* The predecessor node for the second input.
* @param ss1
* The shipping strategy for the first input.
* @param ss2
* The shipping strategy for the second input.
* @param ls
* The local strategy.
* @param outGp
* The global properties of the data that goes to the user function.
* @param outLp
* The local properties of the data that goes to the user function.
* @param estimator
* The cost estimator.
*/
private void createMatchAlternative(List<OptimizerNode> target, OptimizerNode subPlan1, OptimizerNode subPlan2,
ShipStrategy ss1, ShipStrategy ss2, LocalStrategy ls, Order order, boolean grouped, LocalProperties outLpp,
CostEstimator estimator) {
// TODO: check this function. Why are two alternatives generated with different local properties?!?
// no self match
if(ls != LocalStrategy.SELF_NESTEDLOOP && ls != LocalStrategy.SORT_SELF_NESTEDLOOP) {
// compute the given properties of the incoming data
GlobalProperties gp1 = PactConnection.getGlobalPropertiesAfterConnection(subPlan1, this, 0, ss1);
GlobalProperties gp2 = PactConnection.getGlobalPropertiesAfterConnection(subPlan2, this, 1, ss2);
int[] scrambledKeyOrder1 = null;
int[] scrambledKeyOrder2 = null;
// check if input 1 is already partitioned and prepare an identical partitioning for input 2
if (ss1.type() == ShipStrategyType.FORWARD && ss2.type() == ShipStrategyType.PARTITION_HASH) {
// determine the key order used for the existing partitioning on input 1
scrambledKeyOrder1 = getScrambledKeyOrder(this.keySet1, gp1.getPartitionedFields());
// scramble key order for the input 2 that needs to be partitioned
if (scrambledKeyOrder1 != null) {
FieldList scrambledKeys2 = new FieldList();
for (int i = 0; i < scrambledKeyOrder1.length; i++) {
scrambledKeys2.add(this.keySet2.get(scrambledKeyOrder1[i]));
}
gp2.setPartitioning(gp2.getPartitioning(), scrambledKeys2);
ss2 = new PartitionHashSS(scrambledKeys2);
}
}
// check if input 2 is already partitioned and prepare an identical partitioning for input 1
if (ss2.type() == ShipStrategyType.FORWARD && ss1.type() == ShipStrategyType.PARTITION_HASH) {
// determine the key order used for the existing partitioning on input 2
scrambledKeyOrder2 = getScrambledKeyOrder(this.keySet2, gp2.getPartitionedFields());
// scramble key order for input 2 that needs to be partitioned
if (scrambledKeyOrder2 != null) {
FieldList scrambledKeys1 = new FieldList();
for (int i = 0; i < scrambledKeyOrder2.length; i++) {
scrambledKeys1.add(this.keySet1.get(scrambledKeyOrder2[i]));
}
gp1.setPartitioning(gp1.getPartitioning(), scrambledKeys1);
ss1 = new PartitionHashSS(scrambledKeys1);
}
}
LocalProperties outLp = outLpp;
// determine the properties of the data before it goes to the user code
GlobalProperties outGp = new GlobalProperties();
outGp.setPartitioning(gp1.getPartitioning(), gp1.getPartitionedFields());
outGp.setOrdering(gp1.getOrdering());
if (outLpp == null) {
outLp = new LocalProperties();
if (order != Order.NONE) {
Ordering ordering = new Ordering();
for (int keyColumn : this.keySet1) {
ordering.appendOrdering(keyColumn, null, order);
}
outLp.setOrdering(ordering);
}
else {
outLp.setOrdering(null);
}
outLp.setGrouped(grouped, new FieldSet(this.keySet1));
}
// create a new match node for this input
MatchNode n = new MatchNode(this, subPlan1, subPlan2, this.input1, this.input2, outGp, outLp);
n.input1.setShipStrategy(ss1);
n.input2.setShipStrategy(ss2);
n.setLocalStrategy(ls);
// compute, which of the properties survive, depending on the output contract
n.getGlobalProperties().filterByNodesConstantSet(this, 0);
n.getLocalProperties().filterByNodesConstantSet(this, 0);
// compute the costs
estimator.costOperator(n);
target.add(n);
// determine the properties of the data before it goes to the user code
outGp = new GlobalProperties();
outGp.setPartitioning(gp2.getPartitioning(), gp2.getPartitionedFields());
outGp.setOrdering(gp2.getOrdering());
if (outLpp == null) {
outLp = new LocalProperties();
if (order != Order.NONE) {
Ordering ordering = new Ordering();
for (int keyColumn : this.keySet2) {
ordering.appendOrdering(keyColumn, null, order);
}
outLp.setOrdering(ordering);
}
else {
outLp.setOrdering(null);
}
outLp.setGrouped(grouped, new FieldSet(this.keySet2));
}
// create a new reduce node for this input
n = new MatchNode(this, subPlan1, subPlan2, input1, input2, outGp, outLp);
n.input1.setShipStrategy(ss1);
n.input2.setShipStrategy(ss2);
n.setLocalStrategy(ls);
// compute, which of the properties survive, depending on the output contract
n.getGlobalProperties().filterByNodesConstantSet(this, 1);
n.getLocalProperties().filterByNodesConstantSet(this, 1);
// compute the costs
estimator.costOperator(n);
target.add(n);
} else {
// self match
GlobalProperties gp1 = PactConnection.getGlobalPropertiesAfterConnection(subPlan1, this, 0, ss1);
// determine the properties of the data before it goes to the user code
GlobalProperties outGp = new GlobalProperties();
outGp.setPartitioning(gp1.getPartitioning(), gp1.getPartitionedFields());
outGp.setOrdering(gp1.getOrdering());
LocalProperties outLp = null;
if (outLpp == null) {
outLp = new LocalProperties();
if (order != Order.NONE) {
Ordering ordering = new Ordering();
for (int keyColumn : this.keySet1) {
ordering.appendOrdering(keyColumn, null, order);
}
outLp.setOrdering(ordering);
}
else {
outLp.setOrdering(null);
}
outLp.setGrouped(grouped, new FieldSet(this.keySet1));
}
// create a new match node for this input
MatchNode n = new MatchNode(this, subPlan1, null, this.input1, null, outGp, outLp);
n.input1.setShipStrategy(ss1);
// n.input2.setShipStrategy(ss2);
n.setLocalStrategy(ls);
// compute, which of the properties survive, depending on the output contract
n.getGlobalProperties().filterByNodesConstantSet(this, 0);
n.getLocalProperties().filterByNodesConstantSet(this, 0);
// compute the costs
estimator.costOperator(n);
target.add(n);
}
}
/**
* Computes the number of keys that are processed by the PACT.
*
* @return the number of keys processed by the PACT.
*/
protected long computeNumberOfProcessedKeys() {
// Match processes only keys that appear in both input sets
long numKey1 = this.getFirstPredNode().getEstimatedCardinality(new FieldSet(this.keySet1));
long numKey2 = this.getSecondPredNode().getEstimatedCardinality(new FieldSet(this.keySet2));
if(numKey1 == -1 && numKey2 == -2) {
// both key cars unknown.
return -1;
} else if(numKey1 == -1) {
// key card of 1st input unknown. Use key card of 2nd input as upper bound
return numKey2;
} else if(numKey2 == -1) {
// key card of 2nd input unknown. Use key card of 1st input as upper bound
return numKey1;
} else {
// key card of both inputs known. Use minimum as upper bound
return Math.min(numKey1, numKey2);
}
}
/**
* Computes the number of stub calls for one processed key.
*
* @return the number of stub calls for one processed key.
*/
protected double computeStubCallsPerProcessedKey() {
long numKey1 = this.getFirstPredNode().getEstimatedCardinality(new FieldSet(this.keySet1));
long numRecords1 = this.getFirstPredNode().getEstimatedNumRecords();
long numKey2 = this.getSecondPredNode().getEstimatedCardinality(new FieldSet(this.keySet2));
long numRecords2 = this.getSecondPredNode().getEstimatedNumRecords();
if(numKey1 == -1 && numKey2 == -1)
return -1;
double callsPerKey = 1;
if(numKey1 != -1) {
callsPerKey *= (double)numRecords1 / numKey1;
}
if(numKey2 != -1) {
callsPerKey *= (double)numRecords2 / numKey2;
}
return callsPerKey;
}
/**
* Computes the number of stub calls.
*
* @return the number of stub calls.
*/
protected long computeNumberOfStubCalls() {
long processedKeys = this.computeNumberOfProcessedKeys();
double stubCallsPerKey = this.computeStubCallsPerProcessedKey();
if(processedKeys != -1 && stubCallsPerKey != -1) {
return (long) (processedKeys * stubCallsPerKey);
} else {
return -1;
}
}
/**
* TODO move to PartitionProperties (and change them from enum to class)
*
* @param gp
* @param inputNum
* @return
*/
public boolean partitioningIsOnRightFields(GlobalProperties gp, int inputNum) {
FieldList partitionedFields = gp.getPartitionedFields();
if (partitionedFields == null || partitionedFields.size() == 0) {
return false;
}
FieldList keyFields;
switch(inputNum) {
case 0:
keyFields = this.keySet1;
break;
case 1:
keyFields = this.keySet2;
break;
default:
throw new CompilerException("Invalid input number "+inputNum+" for Match.");
}
if (gp.getPartitioning() == PartitionProperty.RANGE_PARTITIONED) {
return keyFields.equals(partitionedFields);
}
for (int partitionedField : partitionedFields) {
boolean foundField = false;
for (int keyField : keyFields){
if (keyField == partitionedField) {
foundField = true;
break;
}
}
if (foundField == false) {
return false;
}
}
return true;
}
/**
* TODO move to PartitionProperties (and change them from enum to class)
*
* @param subkey1
* @param subkey2
* @return
*/
public boolean partitioningIsOnSameSubkey(FieldList subkey1, FieldList subkey2) {
if (subkey1 == null && subkey2 == null) {
return true;
}
if (subkey1 == null || subkey2 == null || subkey1.size() != subkey2.size()) {
return false;
}
for (int i = 0; i < subkey1.size(); i++) {
boolean found = false;
for (int j = 0; j < this.keySet1.size(); j++) {
if (subkey1.get(i) == this.keySet1.get(j)) {
if (subkey2.get(i) != this.keySet2.get(j)) {
return false;
}
found = true;
break;
}
}
if (found == false) {
throw new RuntimeException("Partitioned field is no subset of the key");
}
}
return true;
}
/**
* TODO move to PartitionProperties (and change them from enum to class)
*
* @param oldPositions
* @param newPositions
* @return
*/
private int[] getScrambledKeyOrder(FieldList specifiedOrder, FieldList actualOrder) {
if (specifiedOrder.equals(actualOrder)) {
return null;
}
int[] keyScrambleOrder = new int[actualOrder.size()];
for (int actPos = 0; actPos < actualOrder.size(); actPos++) {
boolean foundKey = false;
for (int specPos = 0; specPos < specifiedOrder.size(); specPos++) {
if (actualOrder.get(actPos) == specifiedOrder.get(specPos)) {
keyScrambleOrder[actPos] = specPos;
foundKey = true;
break;
}
}
if (foundKey == false) {
throw new RuntimeException("Partitioned fields are not subset of the key");
}
}
return keyScrambleOrder;
}
public boolean keepsUniqueProperty(FieldSet uniqueSet, int input) {
FieldSet keyColumnsOtherInput;
switch (input) {
case 0:
keyColumnsOtherInput = new FieldSet(keySet2);
break;
case 1:
keyColumnsOtherInput = new FieldSet(keySet1);
break;
default:
throw new RuntimeException("Input num out of bounds");
}
Set<FieldSet> uniqueInChild = getUniqueFieldsForInput(1-input);
boolean otherKeyIsUnique = false;
for (FieldSet uniqueFields : uniqueInChild) {
if (keyColumnsOtherInput.containsAll(uniqueFields)) {
otherKeyIsUnique = true;
break;
}
}
return otherKeyIsUnique;
}
}