/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.planner;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSet.Builder;
import com.google.common.collect.Lists;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.volcano.AbstractConverter.ExpandConversionRule;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.rules.AggregateExpandDistinctAggregatesRule;
import org.apache.calcite.rel.rules.AggregateRemoveRule;
import org.apache.calcite.rel.rules.FilterMergeRule;
import org.apache.calcite.rel.rules.JoinPushExpressionsRule;
import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
import org.apache.calcite.rel.rules.ProjectRemoveRule;
import org.apache.calcite.rel.rules.ProjectToWindowRule;
import org.apache.calcite.rel.rules.ProjectWindowTransposeRule;
import org.apache.calcite.rel.rules.ReduceExpressionsRule;
import org.apache.calcite.rel.rules.SortRemoveRule;
import org.apache.calcite.rel.rules.UnionToDistinctRule;
import org.apache.calcite.tools.RuleSet;
import org.apache.calcite.tools.RuleSets;
import org.apache.drill.exec.ops.OptimizerRulesContext;
import org.apache.drill.exec.planner.logical.DrillAggregateRule;
import org.apache.drill.exec.planner.logical.DrillFilterAggregateTransposeRule;
import org.apache.drill.exec.planner.logical.DrillFilterJoinRules;
import org.apache.drill.exec.planner.logical.DrillFilterRule;
import org.apache.drill.exec.planner.logical.DrillJoinRel;
import org.apache.drill.exec.planner.logical.DrillJoinRule;
import org.apache.drill.exec.planner.logical.DrillLimitRule;
import org.apache.drill.exec.planner.logical.DrillMergeProjectRule;
import org.apache.drill.exec.planner.logical.DrillProjectRule;
import org.apache.drill.exec.planner.logical.DrillPushFilterPastProjectRule;
import org.apache.drill.exec.planner.logical.DrillPushLimitToScanRule;
import org.apache.drill.exec.planner.logical.DrillPushProjIntoScan;
import org.apache.drill.exec.planner.logical.DrillPushProjectPastFilterRule;
import org.apache.drill.exec.planner.logical.DrillPushProjectPastJoinRule;
import org.apache.drill.exec.planner.logical.DrillReduceAggregatesRule;
import org.apache.drill.exec.planner.logical.DrillReduceExpressionsRule;
import org.apache.drill.exec.planner.logical.DrillRelFactories;
import org.apache.drill.exec.planner.logical.DrillScanRule;
import org.apache.drill.exec.planner.logical.DrillSortRule;
import org.apache.drill.exec.planner.logical.DrillUnionAllRule;
import org.apache.drill.exec.planner.logical.DrillValuesRule;
import org.apache.drill.exec.planner.logical.DrillWindowRule;
import org.apache.drill.exec.planner.logical.partition.ParquetPruneScanRule;
import org.apache.drill.exec.planner.logical.partition.PruneScanRule;
import org.apache.drill.exec.planner.physical.ConvertCountToDirectScan;
import org.apache.drill.exec.planner.physical.DirectScanPrule;
import org.apache.drill.exec.planner.physical.FilterPrule;
import org.apache.drill.exec.planner.physical.HashAggPrule;
import org.apache.drill.exec.planner.physical.HashJoinPrule;
import org.apache.drill.exec.planner.physical.LimitPrule;
import org.apache.drill.exec.planner.physical.LimitUnionExchangeTransposeRule;
import org.apache.drill.exec.planner.physical.MergeJoinPrule;
import org.apache.drill.exec.planner.physical.NestedLoopJoinPrule;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.planner.physical.ProjectPrule;
import org.apache.drill.exec.planner.physical.PushLimitToTopN;
import org.apache.drill.exec.planner.physical.ScanPrule;
import org.apache.drill.exec.planner.physical.ScreenPrule;
import org.apache.drill.exec.planner.physical.SortConvertPrule;
import org.apache.drill.exec.planner.physical.SortPrule;
import org.apache.drill.exec.planner.physical.StreamAggPrule;
import org.apache.drill.exec.planner.physical.UnionAllPrule;
import org.apache.drill.exec.planner.physical.ValuesPrule;
import org.apache.drill.exec.planner.physical.WindowPrule;
import org.apache.drill.exec.planner.physical.WriterPrule;
import org.apache.drill.exec.store.AbstractStoragePlugin;
import org.apache.drill.exec.store.StoragePlugin;
import org.apache.drill.exec.store.parquet.ParquetPushDownFilter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public enum PlannerPhase {
//private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DrillRuleSets.class);
LOGICAL_PRUNE_AND_JOIN("Loigcal Planning (with join and partition pruning)") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(
getDrillBasicRules(context),
getPruneScanRules(context),
getJoinPermRules(context),
getDrillUserConfigurableLogicalRules(context),
getStorageRules(context, plugins, this));
}
},
WINDOW_REWRITE("Window Function rewrites") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return RuleSets.ofList(
ReduceExpressionsRule.CALC_INSTANCE,
ProjectToWindowRule.PROJECT
);
}
},
LOGICAL_PRUNE("Logical Planning (with partition pruning)") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(
getDrillBasicRules(context),
getPruneScanRules(context),
getDrillUserConfigurableLogicalRules(context),
getStorageRules(context, plugins, this));
}
},
JOIN_PLANNING("LOPT Join Planning") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
List<RelOptRule> rules = Lists.newArrayList();
if (context.getPlannerSettings().isJoinOptimizationEnabled()) {
rules.add(DRILL_JOIN_TO_MULTIJOIN_RULE);
rules.add(DRILL_LOPT_OPTIMIZE_JOIN_RULE);
}
rules.add(ProjectRemoveRule.INSTANCE);
return PlannerPhase.mergedRuleSets(
RuleSets.ofList(rules),
getStorageRules(context, plugins, this)
);
}
},
SUM_CONVERSION("Convert SUM to $SUM0") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(
RuleSets.ofList(
DrillReduceAggregatesRule.INSTANCE_SUM,
DrillReduceAggregatesRule.INSTANCE_WINDOW_SUM),
getStorageRules(context, plugins, this)
);
}
},
PARTITION_PRUNING("Partition Prune Planning") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(getPruneScanRules(context), getStorageRules(context, plugins, this));
}
},
PHYSICAL_PARTITION_PRUNING("Physical Partition Prune Planning") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(getPhysicalPruneScanRules(context), getStorageRules(context, plugins, this));
}
},
DIRECTORY_PRUNING("Directory Prune Planning") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(getDirPruneScanRules(context), getStorageRules(context, plugins, this));
}
},
LOGICAL("Logical Planning (no pruning or join).") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(
PlannerPhase.getDrillBasicRules(context),
PlannerPhase.getDrillUserConfigurableLogicalRules(context),
getStorageRules(context, plugins, this));
}
},
PHYSICAL("Physical Planning") {
public RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins) {
return PlannerPhase.mergedRuleSets(
PlannerPhase.getPhysicalRules(context),
getStorageRules(context, plugins, this));
}
};
public final String description;
PlannerPhase(String description) {
this.description = description;
}
public abstract RuleSet getRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins);
private static RuleSet getStorageRules(OptimizerRulesContext context, Collection<StoragePlugin> plugins,
PlannerPhase phase) {
final Builder<RelOptRule> rules = ImmutableSet.builder();
for(StoragePlugin plugin : plugins){
if(plugin instanceof AbstractStoragePlugin){
rules.addAll(((AbstractStoragePlugin) plugin).getOptimizerRules(context, phase));
}else{
rules.addAll(plugin.getOptimizerRules(context));
}
}
return RuleSets.ofList(rules.build());
}
static final RelOptRule DRILL_JOIN_TO_MULTIJOIN_RULE = new JoinToMultiJoinRule(DrillJoinRel.class);
static final RelOptRule DRILL_LOPT_OPTIMIZE_JOIN_RULE = new LoptOptimizeJoinRule(
DrillRelFactories.DRILL_LOGICAL_JOIN_FACTORY,
DrillRelFactories.DRILL_LOGICAL_PROJECT_FACTORY,
DrillRelFactories.DRILL_LOGICAL_FILTER_FACTORY);
/**
* Get a list of logical rules that can be turned on or off by session/system options.
*
* If a rule is intended to always be included with the logical set, it should be added
* to the immutable list created in the getDrillBasicRules() method below.
*
* @param optimizerRulesContext - used to get the list of planner settings, other rules may
* also in the future need to get other query state from this,
* such as the available list of UDFs (as is used by the
* DrillMergeProjectRule created in getDrillBasicRules())
* @return - a list of rules that have been filtered to leave out
* rules that have been turned off by system or session settings
*/
static RuleSet getDrillUserConfigurableLogicalRules(OptimizerRulesContext optimizerRulesContext) {
final PlannerSettings ps = optimizerRulesContext.getPlannerSettings();
// This list is used to store rules that can be turned on an off
// by user facing planning options
final Builder<RelOptRule> userConfigurableRules = ImmutableSet.<RelOptRule>builder();
if (ps.isConstantFoldingEnabled()) {
// TODO - DRILL-2218
userConfigurableRules.add(ReduceExpressionsRule.PROJECT_INSTANCE);
userConfigurableRules.add(DrillReduceExpressionsRule.FILTER_INSTANCE_DRILL);
userConfigurableRules.add(DrillReduceExpressionsRule.CALC_INSTANCE_DRILL);
}
return RuleSets.ofList(userConfigurableRules.build());
}
/*
* These basic rules don't require any context, so singleton instances can be used.
* These are merged with per-query rules in getDrillBasicRules() below.
*/
final static ImmutableSet<RelOptRule> staticRuleSet = ImmutableSet.<RelOptRule> builder().add(
// Add support for Distinct Union (by using Union-All followed by Distinct)
UnionToDistinctRule.INSTANCE,
// Add support for WHERE style joins.
DrillFilterJoinRules.DRILL_FILTER_ON_JOIN,
DrillFilterJoinRules.DRILL_JOIN,
JoinPushExpressionsRule.INSTANCE,
// End support for WHERE style joins.
/*
Filter push-down related rules
*/
DrillPushFilterPastProjectRule.INSTANCE,
// Due to infinite loop in planning (DRILL-3257), temporarily disable this rule
//FilterSetOpTransposeRule.INSTANCE,
DrillFilterAggregateTransposeRule.INSTANCE,
FilterMergeRule.INSTANCE,
AggregateRemoveRule.INSTANCE,
ProjectRemoveRule.INSTANCE,
SortRemoveRule.INSTANCE,
AggregateExpandDistinctAggregatesRule.JOIN,
DrillReduceAggregatesRule.INSTANCE,
/*
Projection push-down related rules
*/
DrillPushProjectPastFilterRule.INSTANCE,
DrillPushProjectPastJoinRule.INSTANCE,
// Due to infinite loop in planning (DRILL-3257), temporarily disable this rule
//DrillProjectSetOpTransposeRule.INSTANCE,
ProjectWindowTransposeRule.INSTANCE,
DrillPushProjIntoScan.INSTANCE,
/*
Convert from Calcite Logical to Drill Logical Rules.
*/
ExpandConversionRule.INSTANCE,
DrillScanRule.INSTANCE,
DrillFilterRule.INSTANCE,
DrillProjectRule.INSTANCE,
DrillWindowRule.INSTANCE,
DrillAggregateRule.INSTANCE,
DrillLimitRule.INSTANCE,
DrillSortRule.INSTANCE,
DrillJoinRule.INSTANCE,
DrillUnionAllRule.INSTANCE,
DrillValuesRule.INSTANCE
).build();
/**
* Get an immutable list of rules that will always be used when running
* logical planning.
*
* This cannot be a static singleton because some of the rules need to
* reference state owned by the current query (including its allocator).
*
* If a logical rule needs to be user configurable, such as turning
* it on and off with a system/session option, add it in the
* getDrillUserConfigurableLogicalRules() method instead of here.
*
* @param optimizerRulesContext - shared state used during planning, currently used here
* to gain access to the function registry described above.
* @return - a RuleSet containing the logical rules that will always
* be used, either by VolcanoPlanner directly, or
* used VolcanoPlanner as pre-processing for LOPTPlanner.
*
* Note : Join permutation rule is excluded here.
*/
static RuleSet getDrillBasicRules(OptimizerRulesContext optimizerRulesContext) {
/*
* We have to create another copy of the ruleset with the context dependent elements;
* this cannot be reused across queries.
*/
final ImmutableSet<RelOptRule> basicRules = ImmutableSet.<RelOptRule>builder()
.addAll(staticRuleSet)
.add(
DrillMergeProjectRule.getInstance(true, RelFactories.DEFAULT_PROJECT_FACTORY,
optimizerRulesContext.getFunctionRegistry())
)
.build();
return RuleSets.ofList(basicRules);
}
/**
* Get an immutable list of partition pruning rules that will be used in logical planning.
*/
static RuleSet getPruneScanRules(OptimizerRulesContext optimizerRulesContext) {
final ImmutableSet<RelOptRule> pruneRules = ImmutableSet.<RelOptRule>builder()
.add(
PruneScanRule.getDirFilterOnProject(optimizerRulesContext),
PruneScanRule.getDirFilterOnScan(optimizerRulesContext),
ParquetPruneScanRule.getFilterOnProjectParquet(optimizerRulesContext),
ParquetPruneScanRule.getFilterOnScanParquet(optimizerRulesContext),
DrillPushLimitToScanRule.LIMIT_ON_SCAN,
DrillPushLimitToScanRule.LIMIT_ON_PROJECT
)
.build();
return RuleSets.ofList(pruneRules);
}
/**
* Get an immutable list of pruning rules that will be used post physical planning.
*/
static RuleSet getPhysicalPruneScanRules(OptimizerRulesContext optimizerRulesContext) {
final ImmutableSet<RelOptRule> pruneRules = ImmutableSet.<RelOptRule>builder()
.add(
// See DRILL-4998 for more detail.
// Main reason for doing this is we want to reduce the performance regression possibility
// caused by a different join order, as a result of reduced row count in scan operator.
// Ideally this should be done in logical planning, before join order planning is done.
// Before we can make such change, we have to figure out how to adjust the selectivity
// estimation of filter operator, after filter is pushed down to scan.
ParquetPushDownFilter.getFilterOnProject(optimizerRulesContext),
ParquetPushDownFilter.getFilterOnScan(optimizerRulesContext)
)
.build();
return RuleSets.ofList(pruneRules);
}
/**
* Get an immutable list of directory-based partition pruing rules that will be used in Calcite logical planning.
* @param optimizerRulesContext
* @return
*/
static RuleSet getDirPruneScanRules(OptimizerRulesContext optimizerRulesContext) {
final ImmutableSet<RelOptRule> pruneRules = ImmutableSet.<RelOptRule>builder()
.add(
PruneScanRule.getDirFilterOnProject(optimizerRulesContext),
PruneScanRule.getDirFilterOnScan(optimizerRulesContext)
)
.build();
return RuleSets.ofList(pruneRules);
}
// Ruleset for join permutation, used only in VolcanoPlanner.
static RuleSet getJoinPermRules(OptimizerRulesContext optimizerRulesContext) {
return RuleSets.ofList(ImmutableSet.<RelOptRule> builder().add( //
JoinPushThroughJoinRule.RIGHT,
JoinPushThroughJoinRule.LEFT
).build());
}
static final RuleSet DRILL_PHYSICAL_DISK = RuleSets.ofList(ImmutableSet.of(
ProjectPrule.INSTANCE
));
static final RuleSet getPhysicalRules(OptimizerRulesContext optimizerRulesContext) {
final List<RelOptRule> ruleList = new ArrayList<RelOptRule>();
final PlannerSettings ps = optimizerRulesContext.getPlannerSettings();
ruleList.add(ConvertCountToDirectScan.AGG_ON_PROJ_ON_SCAN);
ruleList.add(ConvertCountToDirectScan.AGG_ON_SCAN);
ruleList.add(SortConvertPrule.INSTANCE);
ruleList.add(SortPrule.INSTANCE);
ruleList.add(ProjectPrule.INSTANCE);
ruleList.add(ScanPrule.INSTANCE);
ruleList.add(ScreenPrule.INSTANCE);
ruleList.add(ExpandConversionRule.INSTANCE);
ruleList.add(FilterPrule.INSTANCE);
ruleList.add(LimitPrule.INSTANCE);
ruleList.add(WriterPrule.INSTANCE);
ruleList.add(WindowPrule.INSTANCE);
ruleList.add(PushLimitToTopN.INSTANCE);
ruleList.add(LimitUnionExchangeTransposeRule.INSTANCE);
ruleList.add(UnionAllPrule.INSTANCE);
ruleList.add(ValuesPrule.INSTANCE);
ruleList.add(DirectScanPrule.INSTANCE);
if (ps.isHashAggEnabled()) {
ruleList.add(HashAggPrule.INSTANCE);
}
if (ps.isStreamAggEnabled()) {
ruleList.add(StreamAggPrule.INSTANCE);
}
if (ps.isHashJoinEnabled()) {
ruleList.add(HashJoinPrule.DIST_INSTANCE);
if(ps.isBroadcastJoinEnabled()){
ruleList.add(HashJoinPrule.BROADCAST_INSTANCE);
}
}
if (ps.isMergeJoinEnabled()) {
ruleList.add(MergeJoinPrule.DIST_INSTANCE);
if(ps.isBroadcastJoinEnabled()){
ruleList.add(MergeJoinPrule.BROADCAST_INSTANCE);
}
}
// NLJ plans consist of broadcasting the right child, hence we need
// broadcast join enabled.
if (ps.isNestedLoopJoinEnabled() && ps.isBroadcastJoinEnabled()) {
ruleList.add(NestedLoopJoinPrule.INSTANCE);
}
return RuleSets.ofList(ImmutableSet.copyOf(ruleList));
}
static RuleSet create(ImmutableSet<RelOptRule> rules) {
return RuleSets.ofList(rules);
}
static RuleSet mergedRuleSets(RuleSet... ruleSets) {
final Builder<RelOptRule> relOptRuleSetBuilder = ImmutableSet.builder();
for (final RuleSet ruleSet : ruleSets) {
for (final RelOptRule relOptRule : ruleSet) {
relOptRuleSetBuilder.add(relOptRule);
}
}
return RuleSets.ofList(relOptRuleSetBuilder.build());
}
}