/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.newplan.logical.optimizer; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import com.google.common.base.Preconditions; import com.google.common.collect.SetMultimap; import com.google.common.collect.TreeMultimap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.newplan.OperatorPlan; import org.apache.pig.newplan.logical.rules.AddForEach; import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune; import org.apache.pig.newplan.logical.rules.DuplicateForEachColumnRewrite; import org.apache.pig.newplan.logical.rules.FilterAboveForeach; import org.apache.pig.newplan.logical.rules.GroupByConstParallelSetter; import org.apache.pig.newplan.logical.rules.ImplicitSplitInserter; import org.apache.pig.newplan.logical.rules.LimitOptimizer; import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter; import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier; import org.apache.pig.newplan.logical.rules.MergeFilter; import org.apache.pig.newplan.logical.rules.MergeForEach; import org.apache.pig.newplan.logical.rules.PartitionFilterOptimizer; import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten; import org.apache.pig.newplan.logical.rules.PushUpFilter; import org.apache.pig.newplan.logical.rules.SplitFilter; import org.apache.pig.newplan.logical.rules.StreamTypeCastInserter; import org.apache.pig.newplan.optimizer.PlanOptimizer; import org.apache.pig.newplan.optimizer.Rule; public class LogicalPlanOptimizer extends PlanOptimizer { private static final Log LOG = LogFactory.getLog(LogicalPlanOptimizer.class); private static enum RulesReportKey { RULES_ENABLED, RULES_DISABLED } private Set<String> mRulesOff = null; private boolean allRulesDisabled = false; private SetMultimap<RulesReportKey, String> rulesReport = TreeMultimap.create(); /** * Create a new LogicalPlanOptimizer. * @param p Plan to optimize. * @param iterations Maximum number of optimizer iterations. * @param turnOffRules Optimization rules to disable. "all" disables all non-mandatory * rules. null enables all rules. */ public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) { super(p, null, iterations); mRulesOff = turnOffRules == null ? new HashSet<String>() : turnOffRules; if (mRulesOff.contains("all")) { allRulesDisabled = true; } ruleSets = buildRuleSets(); LOG.info(rulesReport); addListeners(); } protected List<Set<Rule>> buildRuleSets() { List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); // ImplicitSplitInserter set // This set of rules Insert Foreach dedicated for casting after load Set<Rule> s = new HashSet<Rule>(); Rule r = new ImplicitSplitInserter("ImplicitSplitInserter"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // DuplicateForEachColumnRewrite set // This insert Identity UDF in the case foreach duplicate field. // This is because we need unique uid through out the plan s = new HashSet<Rule>(); r = new DuplicateForEachColumnRewrite("DuplicateForEachColumnRewrite"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Logical expression simplifier s = new HashSet<Rule>(); // add logical expression simplification rule r = new LogicalExpressionSimplifier("FilterLogicExpressionSimplifier"); checkAndAddRule(s, r); ls.add(s); // TypeCastInserter set // This set of rules Insert Foreach dedicated for casting after load s = new HashSet<Rule>(); // add split filter rule r = new LoadTypeCastInserter("LoadTypeCastInserter"); checkAndAddRule(s, r); r = new StreamTypeCastInserter("StreamTypeCastInserter"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Limit Set // This set of rules push up limit s = new HashSet<Rule>(); // Optimize limit r = new LimitOptimizer("LimitOptimizer"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Split Set // This set of rules does splitting of operators only. // It does not move operators s = new HashSet<Rule>(); // add split filter rule r = new SplitFilter("SplitFilter"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Push Set, // This set does moving of operators only. s = new HashSet<Rule>(); r = new PushUpFilter("PushUpFilter"); checkAndAddRule(s, r); r = new FilterAboveForeach("PushUpFilter"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Merge Set // This Set merges operators but does not move them. s = new HashSet<Rule>(); checkAndAddRule(s, r); // add merge filter rule r = new MergeFilter("MergeFilter"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Partition filter set // This set of rules push partition filter to LoadFunc s = new HashSet<Rule>(); // Optimize partition filter r = new PartitionFilterOptimizer("PartitionFilterOptimizer"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // PushDownForEachFlatten set s = new HashSet<Rule>(); // Add the PushDownForEachFlatten r = new PushDownForEachFlatten("PushDownForEachFlatten"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Prune Set // This set is used for pruning columns and maps s = new HashSet<Rule>(); // Add the PruneMap Filter r = new ColumnMapKeyPrune("ColumnMapKeyPrune"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Add LOForEach set s = new HashSet<Rule>(); // Add the AddForEach r = new AddForEach("AddForEach"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); // Add MergeForEach set s = new HashSet<Rule>(); // Add the AddForEach r = new MergeForEach("MergeForEach"); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); //set parallism to 1 for cogroup/group-by on constant s = new HashSet<Rule>(); r = new GroupByConstParallelSetter("GroupByConstParallelSetter"); checkAndAddRule(s, r); if(!s.isEmpty()) ls.add(s); return ls; } /** * Add rule to ruleSet if its mandatory, or has not been disabled. * @param ruleSet Set rule will be added to if not disabled. * @param rule Rule to potentially add. */ private void checkAndAddRule(Set<Rule> ruleSet, Rule rule) { Preconditions.checkArgument(ruleSet != null); Preconditions.checkArgument(rule != null && rule.getName() != null); if (rule.isMandatory()) { ruleSet.add(rule); rulesReport.put(RulesReportKey.RULES_ENABLED, rule.getName()); } else if (!allRulesDisabled && !mRulesOff.contains(rule.getName())) { ruleSet.add(rule); rulesReport.put(RulesReportKey.RULES_ENABLED, rule.getName()); } else { rulesReport.put(RulesReportKey.RULES_DISABLED, rule.getName()); } } private void addListeners() { addPlanTransformListener(new SchemaPatcher()); addPlanTransformListener(new ProjectionPatcher()); } }