/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer.physical; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MergeJoinWork; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TezWork; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * SerializeFilter is a simple physical optimizer that serializes all filter expressions in * Tablescan Operators. */ public class SerializeFilter implements PhysicalPlanResolver { protected static transient final Logger LOG = LoggerFactory.getLogger(SerializeFilter.class); public class Serializer implements Dispatcher { private final PhysicalContext pctx; public Serializer(PhysicalContext pctx) { this.pctx = pctx; } @SuppressWarnings("unchecked") @Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; if (currTask instanceof StatsTask) { currTask = ((StatsTask) currTask).getWork().getSourceTask(); } if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); for (BaseWork w : work.getAllWork()) { evaluateWork(w); } } return null; } private void evaluateWork(BaseWork w) throws SemanticException { if (w instanceof MapWork) { evaluateMapWork((MapWork) w); } else if (w instanceof ReduceWork) { evaluateReduceWork((ReduceWork) w); } else if (w instanceof MergeJoinWork) { evaluateMergeWork((MergeJoinWork) w); } else { LOG.info("We are not going to evaluate this work type: " + w.getClass().getCanonicalName()); } } private void evaluateMergeWork(MergeJoinWork w) throws SemanticException { for (BaseWork baseWork : w.getBaseWorkList()) { evaluateOperators(baseWork, pctx); } } private void evaluateReduceWork(ReduceWork w) throws SemanticException { evaluateOperators(w, pctx); } private void evaluateMapWork(MapWork w) throws SemanticException { evaluateOperators(w, pctx); } private void evaluateOperators(BaseWork w, PhysicalContext pctx) throws SemanticException { Dispatcher disp = null; final Set<TableScanOperator> tableScans = new LinkedHashSet<TableScanOperator>(); Map<Rule, NodeProcessor> rules = new HashMap<Rule, NodeProcessor>(); rules.put(new RuleRegExp("TS finder", TableScanOperator.getOperatorName() + "%"), new NodeProcessor() { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) { tableScans.add((TableScanOperator) nd); return null; } }); disp = new DefaultRuleDispatcher(null, rules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(w.getAllRootOperators()); LinkedHashMap<Node, Object> nodeOutput = new LinkedHashMap<Node, Object>(); ogw.startWalking(topNodes, nodeOutput); for (TableScanOperator ts: tableScans) { if (ts.getConf() != null && ts.getConf().getFilterExpr() != null) { if (LOG.isDebugEnabled()) { LOG.debug("Serializing: " + ts.getConf().getFilterExpr().getExprString()); } ts.getConf().setSerializedFilterExpr( SerializationUtilities.serializeExpression(ts.getConf().getFilterExpr())); } if (ts.getConf() != null && ts.getConf().getFilterObject() != null) { if (LOG.isDebugEnabled()) { LOG.debug("Serializing: " + ts.getConf().getFilterObject()); } ts.getConf().setSerializedFilterObject( SerializationUtilities.serializeObject(ts.getConf().getFilterObject())); } } } public class DefaultRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { return null; } } } @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { pctx.getConf(); // create dispatcher and graph walker Dispatcher disp = new Serializer(pctx); TaskGraphWalker ogw = new TaskGraphWalker(disp); // get all the tasks nodes from root task ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getRootTasks()); // begin to walk through the task tree. ogw.startWalking(topNodes, null); return pctx; } }