/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.impl.logicalLayer.optimizer; import java.util.List; import java.util.Set; import org.apache.pig.ExecType; import org.apache.pig.PigException; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.LOFilter; import org.apache.pig.impl.logicalLayer.LOForEach; import org.apache.pig.impl.logicalLayer.LOLimit; import org.apache.pig.impl.logicalLayer.LOLoad; import org.apache.pig.impl.logicalLayer.LONative; import org.apache.pig.impl.logicalLayer.LOStream; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.logicalLayer.RelationalOperator; import org.apache.pig.impl.plan.NodeIdGenerator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.optimizer.OptimizerException; import org.apache.pig.impl.plan.optimizer.PlanOptimizer; import org.apache.pig.impl.plan.optimizer.Rule; import org.apache.pig.impl.plan.optimizer.RuleMatcher; import org.apache.pig.impl.plan.optimizer.RuleOperator; import org.apache.pig.impl.plan.optimizer.RulePlan; /** * An optimizer for logical plans. */ public class LogicalOptimizer extends PlanOptimizer<LogicalOperator, LogicalPlan> { private static final String SCOPE = "RULE"; private static NodeIdGenerator nodeIdGen = NodeIdGenerator.getGenerator(); private Set<String> mRulesOff = null; private Rule<LogicalOperator, LogicalPlan> pruneRule; public LogicalOptimizer(LogicalPlan plan) { this(plan, ExecType.MAPREDUCE); } public LogicalOptimizer(LogicalPlan plan, ExecType mode) { super(plan); runOptimizations(plan, mode); } public LogicalOptimizer(LogicalPlan plan, ExecType mode, Set<String> turnOffRules) { super(plan); mRulesOff = turnOffRules; runOptimizations(plan, mode); } private void runOptimizations(LogicalPlan plan, ExecType mode) { RulePlan rulePlan; // List of rules for the logical optimizer boolean turnAllRulesOff = false; if (mRulesOff != null) { for (String rule : mRulesOff) { if ("all".equalsIgnoreCase(rule)) { turnAllRulesOff = true; break; } } } // This one has to be before the type cast inserter as it expects the // load to only have one output. // Find any places in the plan that have an implicit split and make // it explicit. Since the RuleMatcher doesn't handle trees properly, // we cheat and say that we match any node. Then we'll do the actual // test in the transformers check method. rulePlan = new RulePlan(); RuleOperator anyLogicalOperator = new RuleOperator(LogicalOperator.class, RuleOperator.NodeType.ANY_NODE, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(anyLogicalOperator); mRules.add(new Rule<LogicalOperator, LogicalPlan>(rulePlan, new ImplicitSplitInserter(plan), "ImplicitSplitInserter")); // this one is ordered to be before other optimizations since later // optimizations may move the LOFilter that is looks for just after a // LOLoad rulePlan = new RulePlan(); RuleOperator loLoad = new RuleOperator(LOLoad.class, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(loLoad); mRules.add(new Rule<LogicalOperator, LogicalPlan>(rulePlan, new PartitionFilterOptimizer(plan), "LoadPartitionFilterOptimizer")); // Add type casting to plans where the schema has been declared (by // user, data, or data catalog). rulePlan = new RulePlan(); loLoad = new RuleOperator(LOLoad.class, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(loLoad); mRules.add(new Rule<LogicalOperator, LogicalPlan>(rulePlan, new TypeCastInserter(plan, LOLoad.class.getName()), "LoadTypeCastInserter")); // Add type casting to plans where the schema has been declared by // user in a statement with stream operator. rulePlan = new RulePlan(); RuleOperator loStream= new RuleOperator(LOStream.class, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(loStream); mRules.add(new Rule<LogicalOperator, LogicalPlan>(rulePlan, new TypeCastInserter(plan, LOStream.class.getName()), "StreamTypeCastInserter")); if(!turnAllRulesOff) { // Push up limit wherever possible. rulePlan = new RulePlan(); RuleOperator loLimit = new RuleOperator(LOLimit.class, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(loLimit); Rule<LogicalOperator, LogicalPlan> rule = new Rule<LogicalOperator, LogicalPlan>(rulePlan, new OpLimitOptimizer(plan, mode), "LimitOptimizer"); checkAndAddRule(rule); // Push filters up wherever possible rulePlan = new RulePlan(); RuleOperator loFilter = new RuleOperator(LOFilter.class, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(loFilter); rule = new Rule<LogicalOperator, LogicalPlan>(rulePlan, new PushUpFilter(plan), "PushUpFilter"); checkAndAddRule(rule); // Push foreach with flatten down wherever possible rulePlan = new RulePlan(); RuleOperator loForeach = new RuleOperator(LOForEach.class, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(loForeach); rule = new Rule<LogicalOperator, LogicalPlan>(rulePlan, new PushDownForeachFlatten(plan), "PushDownForeachFlatten"); checkAndAddRule(rule); // Prune column up wherever possible rulePlan = new RulePlan(); RuleOperator rulePruneColumnsOperator = new RuleOperator(RelationalOperator.class, RuleOperator.NodeType.ANY_NODE, new OperatorKey(SCOPE, nodeIdGen.getNextNodeId(SCOPE))); rulePlan.add(rulePruneColumnsOperator); pruneRule = new Rule<LogicalOperator, LogicalPlan>(rulePlan, new PruneColumns(plan), "PruneColumns", Rule.WalkerAlgo.ReverseDependencyOrderWalker); } } private boolean ruleEnabled(Rule<LogicalOperator, LogicalPlan> rule) { if(mRulesOff != null && rule != null) { for(String ruleOff: mRulesOff) { String ruleName = rule.getRuleName(); if(ruleName == null) continue; if(ruleName.equalsIgnoreCase(ruleOff)) return false; } } mRules.add(rule); return true; } private void checkAndAddRule(Rule<LogicalOperator, LogicalPlan> rule) { if (ruleEnabled(rule)) mRules.add(rule); } @Override public final int optimize() throws OptimizerException { //the code that follows is a copy of the code in the //base class. see the todo note in the base class boolean sawMatch = false; boolean initialized = false; int numIterations = 0; do { sawMatch = false; for (Rule<LogicalOperator, LogicalPlan> rule : mRules) { RuleMatcher<LogicalOperator, LogicalPlan> matcher = new RuleMatcher<LogicalOperator, LogicalPlan>(); if (matcher.match(rule)) { // It matches the pattern. Now check if the transformer // approves as well. List<List<LogicalOperator>> matches = matcher.getAllMatches(); for (List<LogicalOperator> match:matches) { if (rule.getTransformer().check(match)) { try { // The transformer approves. sawMatch = true; if (!initialized) { ((LogicalTransformer)rule.getTransformer()).rebuildSchemas(); ((LogicalTransformer)rule.getTransformer()).rebuildProjectionMaps(); initialized = true; } rule.getTransformer().transform(match); ((LogicalTransformer)rule.getTransformer()).rebuildSchemas(); ((LogicalTransformer)rule.getTransformer()).rebuildProjectionMaps(); } catch (FrontendException fee) { int errCode = 2145; String msg = "Problem while rebuilding projection map or schema in logical optimizer."; throw new OptimizerException(msg, errCode, PigException.BUG, fee); } } rule.getTransformer().reset(); } } } } while(sawMatch && ++numIterations < mMaxIterations); if (pruneRule!=null && ruleEnabled(pruneRule)) { RuleMatcher<LogicalOperator, LogicalPlan> matcher = new RuleMatcher<LogicalOperator, LogicalPlan>(); if (matcher.match(pruneRule)) { List<List<LogicalOperator>> matches = matcher.getAllMatches(); for (List<LogicalOperator> match:matches) { if (pruneRule.getTransformer().check(match)) { pruneRule.getTransformer().transform(match); } } ((PruneColumns)pruneRule.getTransformer()).prune(); } } return numIterations; } }