/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason;
import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hive.common.util.ReflectionUtil;
/**
* ReduceWork represents all the information used to run a reduce task on the cluster.
* It is first used when the query planner breaks the logical plan into tasks and
* used throughout physical optimization to track reduce-side operator plans, schema
* info about key/value pairs, etc
*
* ExecDriver will serialize the contents of this class and make sure it is
* distributed on the cluster. The ExecReducer will ultimately deserialize this
* class on the data nodes and setup it's operator pipeline accordingly.
*
* This class is also used in the explain command any property with the
* appropriate annotation will be displayed in the explain output.
*/
@SuppressWarnings({"serial", "deprecation"})
public class ReduceWork extends BaseWork {
public ReduceWork() {}
public ReduceWork(String name) {
super(name);
}
// schema of the map-reduce 'key' object - this is homogeneous
private TableDesc keyDesc;
// schema of the map-reduce 'value' object - this is heterogeneous
private List<TableDesc> tagToValueDesc = new ArrayList<TableDesc>();
// first operator of the reduce task. (not the reducesinkoperator, but the
// operator that handles the output of these, e.g.: JoinOperator).
private Operator<?> reducer;
// desired parallelism of the reduce task.
private Integer numReduceTasks;
// boolean to signal whether tagging will be used (e.g.: join) or
// not (e.g.: group by)
private boolean needsTagging;
private Map<Integer, String> tagToInput = new HashMap<Integer, String>();
// boolean that says whether tez auto reduce parallelism should be used
private boolean isAutoReduceParallelism;
// boolean that says whether the data distribution is uniform hash (not java HashCode)
private transient boolean isUniformDistribution = false;
// boolean that says whether to slow start or not
private boolean isSlowStart = true;
// for auto reduce parallelism - minimum reducers requested
private int minReduceTasks;
// for auto reduce parallelism - max reducers requested
private int maxReduceTasks;
private ObjectInspector keyObjectInspector = null;
private ObjectInspector valueObjectInspector = null;
private boolean reduceVectorizationEnabled;
private String vectorReduceEngine;
private String vectorReduceColumnSortOrder;
private String vectorReduceColumnNullOrder;
private transient TezEdgeProperty edgeProp;
/**
* If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
* to keySerializeInfo of the ReduceSink
*
* @param keyDesc
*/
public void setKeyDesc(final TableDesc keyDesc) {
this.keyDesc = keyDesc;
}
public TableDesc getKeyDesc() {
return keyDesc;
}
public List<TableDesc> getTagToValueDesc() {
return tagToValueDesc;
}
public void setTagToValueDesc(final List<TableDesc> tagToValueDesc) {
this.tagToValueDesc = tagToValueDesc;
}
@Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
vectorization = Vectorization.SUMMARY_PATH)
public String getExecutionMode() {
if (vectorMode) {
if (llapMode) {
if (uberMode) {
return "vectorized, uber";
} else {
return "vectorized, llap";
}
} else {
return "vectorized";
}
} else if (llapMode) {
return uberMode? "uber" : "llap";
}
return null;
}
@Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
vectorization = Vectorization.OPERATOR_PATH)
public Operator<?> getReducer() {
return reducer;
}
public void setReducer(final Operator<?> reducer) {
this.reducer = reducer;
}
@Explain(displayName = "Needs Tagging", explainLevels = { Level.EXTENDED })
public boolean getNeedsTagging() {
return needsTagging;
}
public void setNeedsTagging(boolean needsTagging) {
this.needsTagging = needsTagging;
}
public void setTagToInput(final Map<Integer, String> tagToInput) {
this.tagToInput = tagToInput;
}
@Explain(displayName = "tagToInput", explainLevels = { Level.USER })
public Map<Integer, String> getTagToInput() {
return tagToInput;
}
@Override
public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap) {
setReducer(replacementMap.get(getReducer()));
}
@Override
public Set<Operator<?>> getAllRootOperators() {
Set<Operator<?>> opSet = new LinkedHashSet<Operator<?>>();
opSet.add(getReducer());
return opSet;
}
@Override
public Operator<? extends OperatorDesc> getAnyRootOperator() {
return getReducer();
}
/**
* If the number of reducers is -1, the runtime will automatically figure it
* out by input data size.
*
* The number of reducers will be a positive number only in case the target
* table is bucketed into N buckets (through CREATE TABLE). This feature is
* not supported yet, so the number of reducers will always be -1 for now.
*/
public Integer getNumReduceTasks() {
return numReduceTasks;
}
public void setNumReduceTasks(final Integer numReduceTasks) {
this.numReduceTasks = numReduceTasks;
}
@Override
public void configureJobConf(JobConf job) {
if (reducer != null) {
for (FileSinkOperator fs : OperatorUtils.findOperators(reducer, FileSinkOperator.class)) {
PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job);
}
}
}
public void setAutoReduceParallelism(boolean isAutoReduceParallelism) {
this.isAutoReduceParallelism = isAutoReduceParallelism;
}
public boolean isAutoReduceParallelism() {
return isAutoReduceParallelism;
}
public boolean isSlowStart() {
return isSlowStart;
}
public void setSlowStart(boolean isSlowStart) {
this.isSlowStart = isSlowStart;
}
// ReducerTraits.UNIFORM
public void setUniformDistribution(boolean isUniformDistribution) {
this.isUniformDistribution = isUniformDistribution;
}
public boolean isUniformDistribution() {
return this.isUniformDistribution;
}
public void setMinReduceTasks(int minReduceTasks) {
this.minReduceTasks = minReduceTasks;
}
public int getMinReduceTasks() {
return minReduceTasks;
}
public int getMaxReduceTasks() {
return maxReduceTasks;
}
public void setMaxReduceTasks(int maxReduceTasks) {
this.maxReduceTasks = maxReduceTasks;
}
public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) {
this.reduceVectorizationEnabled = reduceVectorizationEnabled;
}
public boolean getReduceVectorizationEnabled() {
return reduceVectorizationEnabled;
}
public void setVectorReduceEngine(String vectorReduceEngine) {
this.vectorReduceEngine = vectorReduceEngine;
}
public String getVectorReduceEngine() {
return vectorReduceEngine;
}
public void setVectorReduceColumnSortOrder(String vectorReduceColumnSortOrder) {
this.vectorReduceColumnSortOrder = vectorReduceColumnSortOrder;
}
public String getVectorReduceColumnSortOrder() {
return vectorReduceColumnSortOrder;
}
public void setVectorReduceColumnNullOrder(String vectorReduceColumnNullOrder) {
this.vectorReduceColumnNullOrder = vectorReduceColumnNullOrder;
}
public String getVectorReduceColumnNullOrder() {
return vectorReduceColumnNullOrder;
}
// Use LinkedHashSet to give predictable display order.
private static Set<String> reduceVectorizableEngines =
new LinkedHashSet<String>(Arrays.asList("tez", "spark"));
public class ReduceExplainVectorization extends BaseExplainVectorization {
private final ReduceWork reduceWork;
private VectorizationCondition[] reduceVectorizationConditions;
public ReduceExplainVectorization(ReduceWork reduceWork) {
super(reduceWork);
this.reduceWork = reduceWork;
}
private VectorizationCondition[] createReduceExplainVectorizationConditions() {
boolean enabled = reduceWork.getReduceVectorizationEnabled();
String engine = reduceWork.getVectorReduceEngine();
String engineInSupportedCondName =
HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines;
boolean engineInSupported = reduceVectorizableEngines.contains(engine);
VectorizationCondition[] conditions = new VectorizationCondition[] {
new VectorizationCondition(
enabled,
HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname),
new VectorizationCondition(
engineInSupported,
engineInSupportedCondName)
};
return conditions;
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public List<String> getEnableConditionsMet() {
if (reduceVectorizationConditions == null) {
reduceVectorizationConditions = createReduceExplainVectorizationConditions();
}
return VectorizationCondition.getConditionsMet(reduceVectorizationConditions);
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public List<String> getEnableConditionsNotMet() {
if (reduceVectorizationConditions == null) {
reduceVectorizationConditions = createReduceExplainVectorizationConditions();
}
return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions);
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "reduceColumnSortOrder", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getReduceColumnSortOrder() {
if (!getVectorizationExamined()) {
return null;
}
return reduceWork.getVectorReduceColumnSortOrder();
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "reduceColumnNullOrder", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getReduceColumnNullOrder() {
if (!getVectorizationExamined()) {
return null;
}
return reduceWork.getVectorReduceColumnNullOrder();
}
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public ReduceExplainVectorization getReduceExplainVectorization() {
if (!getVectorizationExamined()) {
return null;
}
return new ReduceExplainVectorization(this);
}
public void setEdgePropRef(TezEdgeProperty edgeProp) {
this.edgeProp = edgeProp;
}
public TezEdgeProperty getEdgePropRef() {
return edgeProp;
}
}