ReduceWork.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.plan;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason;
import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hive.common.util.ReflectionUtil;

/**
 * ReduceWork represents all the information used to run a reduce task on the cluster.
 * It is first used when the query planner breaks the logical plan into tasks and
 * used throughout physical optimization to track reduce-side operator plans, schema
 * info about key/value pairs, etc
 *
 * ExecDriver will serialize the contents of this class and make sure it is
 * distributed on the cluster. The ExecReducer will ultimately deserialize this
 * class on the data nodes and setup it's operator pipeline accordingly.
 *
 * This class is also used in the explain command any property with the
 * appropriate annotation will be displayed in the explain output.
 */
@SuppressWarnings({"serial", "deprecation"})
public class ReduceWork extends BaseWork {

  public ReduceWork() {}

  public ReduceWork(String name) {
    super(name);
  }

  // schema of the map-reduce 'key' object - this is homogeneous
  private TableDesc keyDesc;

  // schema of the map-reduce 'value' object - this is heterogeneous
  private List<TableDesc> tagToValueDesc = new ArrayList<TableDesc>();

  // first operator of the reduce task. (not the reducesinkoperator, but the
  // operator that handles the output of these, e.g.: JoinOperator).
  private Operator<?> reducer;

  // desired parallelism of the reduce task.
  private Integer numReduceTasks;

  // boolean to signal whether tagging will be used (e.g.: join) or
  // not (e.g.: group by)
  private boolean needsTagging;

  private Map<Integer, String> tagToInput = new HashMap<Integer, String>();

  // boolean that says whether tez auto reduce parallelism should be used
  private boolean isAutoReduceParallelism;
  // boolean that says whether the data distribution is uniform hash (not java HashCode)
  private transient boolean isUniformDistribution = false;

  // boolean that says whether to slow start or not
  private boolean isSlowStart = true;

  // for auto reduce parallelism - minimum reducers requested
  private int minReduceTasks;

  // for auto reduce parallelism - max reducers requested
  private int maxReduceTasks;

  private ObjectInspector keyObjectInspector = null;
  private ObjectInspector valueObjectInspector = null;

  private boolean reduceVectorizationEnabled;
  private String vectorReduceEngine;

  private String vectorReduceColumnSortOrder;
  private String vectorReduceColumnNullOrder;

  private transient TezEdgeProperty edgeProp;

  /**
   * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
   * to keySerializeInfo of the ReduceSink
   *
   * @param keyDesc
   */
  public void setKeyDesc(final TableDesc keyDesc) {
    this.keyDesc = keyDesc;
  }

  public TableDesc getKeyDesc() {
     return keyDesc;
  }

  public List<TableDesc> getTagToValueDesc() {
    return tagToValueDesc;
  }

  public void setTagToValueDesc(final List<TableDesc> tagToValueDesc) {
    this.tagToValueDesc = tagToValueDesc;
  }

  @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
      vectorization = Vectorization.SUMMARY_PATH)
  public String getExecutionMode() {
    if (vectorMode) {
      if (llapMode) {
	if (uberMode) {
	  return "vectorized, uber";
	} else {
	  return "vectorized, llap";
	}
      } else {
	return "vectorized";
      }
    } else if (llapMode) {
      return uberMode? "uber" : "llap";
    }
    return null;
  }

  @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
      vectorization = Vectorization.OPERATOR_PATH)
  public Operator<?> getReducer() {
    return reducer;
  }

  public void setReducer(final Operator<?> reducer) {
    this.reducer = reducer;
  }

  @Explain(displayName = "Needs Tagging", explainLevels = { Level.EXTENDED })
  public boolean getNeedsTagging() {
    return needsTagging;
  }

  public void setNeedsTagging(boolean needsTagging) {
    this.needsTagging = needsTagging;
  }

  public void setTagToInput(final Map<Integer, String> tagToInput) {
    this.tagToInput = tagToInput;
  }

  @Explain(displayName = "tagToInput", explainLevels = { Level.USER })
  public Map<Integer, String> getTagToInput() {
    return tagToInput;
  }

  @Override
  public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap) {
    setReducer(replacementMap.get(getReducer()));
  }

  @Override
  public Set<Operator<?>> getAllRootOperators() {
    Set<Operator<?>> opSet = new LinkedHashSet<Operator<?>>();
    opSet.add(getReducer());
    return opSet;
  }

  @Override
  public Operator<? extends OperatorDesc> getAnyRootOperator() {
    return getReducer();
  }

  /**
   * If the number of reducers is -1, the runtime will automatically figure it
   * out by input data size.
   *
   * The number of reducers will be a positive number only in case the target
   * table is bucketed into N buckets (through CREATE TABLE). This feature is
   * not supported yet, so the number of reducers will always be -1 for now.
   */
  public Integer getNumReduceTasks() {
      return numReduceTasks;
  }

  public void setNumReduceTasks(final Integer numReduceTasks) {
    this.numReduceTasks = numReduceTasks;
  }

  @Override
  public void configureJobConf(JobConf job) {
    if (reducer != null) {
      for (FileSinkOperator fs : OperatorUtils.findOperators(reducer, FileSinkOperator.class)) {
        PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job);
      }
    }
  }

  public void setAutoReduceParallelism(boolean isAutoReduceParallelism) {
    this.isAutoReduceParallelism = isAutoReduceParallelism;
  }

  public boolean isAutoReduceParallelism() {
    return isAutoReduceParallelism;
  }

  public boolean isSlowStart() {
    return isSlowStart;
  }

  public void setSlowStart(boolean isSlowStart) {
    this.isSlowStart = isSlowStart;
  }

  // ReducerTraits.UNIFORM
  public void setUniformDistribution(boolean isUniformDistribution) {
    this.isUniformDistribution = isUniformDistribution;
  }

  public boolean isUniformDistribution() {
    return this.isUniformDistribution;
  }

  public void setMinReduceTasks(int minReduceTasks) {
    this.minReduceTasks = minReduceTasks;
  }

  public int getMinReduceTasks() {
    return minReduceTasks;
  }

  public int getMaxReduceTasks() {
    return maxReduceTasks;
  }

  public void setMaxReduceTasks(int maxReduceTasks) {
    this.maxReduceTasks = maxReduceTasks;
  }

  public void setReduceVectorizationEnabled(boolean reduceVectorizationEnabled) {
    this.reduceVectorizationEnabled = reduceVectorizationEnabled;
  }

  public boolean getReduceVectorizationEnabled() {
    return reduceVectorizationEnabled;
  }

  public void setVectorReduceEngine(String vectorReduceEngine) {
    this.vectorReduceEngine = vectorReduceEngine;
  }

  public String getVectorReduceEngine() {
    return vectorReduceEngine;
  }

  public void setVectorReduceColumnSortOrder(String vectorReduceColumnSortOrder) {
    this.vectorReduceColumnSortOrder = vectorReduceColumnSortOrder;
  }

  public String getVectorReduceColumnSortOrder() {
    return vectorReduceColumnSortOrder;
  }

  public void setVectorReduceColumnNullOrder(String vectorReduceColumnNullOrder) {
    this.vectorReduceColumnNullOrder = vectorReduceColumnNullOrder;
  }

  public String getVectorReduceColumnNullOrder() {
    return vectorReduceColumnNullOrder;
  }

  // Use LinkedHashSet to give predictable display order.
  private static Set<String> reduceVectorizableEngines =
      new LinkedHashSet<String>(Arrays.asList("tez", "spark"));

  public class ReduceExplainVectorization extends BaseExplainVectorization {

    private final ReduceWork reduceWork;

    private VectorizationCondition[] reduceVectorizationConditions;

    public ReduceExplainVectorization(ReduceWork reduceWork) {
      super(reduceWork);
      this.reduceWork = reduceWork;
    }

    private VectorizationCondition[] createReduceExplainVectorizationConditions() {

      boolean enabled = reduceWork.getReduceVectorizationEnabled();

      String engine = reduceWork.getVectorReduceEngine();
      String engineInSupportedCondName =
          HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + reduceVectorizableEngines;

      boolean engineInSupported = reduceVectorizableEngines.contains(engine);

      VectorizationCondition[] conditions = new VectorizationCondition[] {
          new VectorizationCondition(
              enabled,
              HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname),
          new VectorizationCondition(
              engineInSupported,
              engineInSupportedCondName)
      };
      return conditions;
    }

    @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public List<String> getEnableConditionsMet() {
      if (reduceVectorizationConditions == null) {
        reduceVectorizationConditions = createReduceExplainVectorizationConditions();
      }
      return VectorizationCondition.getConditionsMet(reduceVectorizationConditions);
    }

    @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public List<String> getEnableConditionsNotMet() {
      if (reduceVectorizationConditions == null) {
        reduceVectorizationConditions = createReduceExplainVectorizationConditions();
      }
      return VectorizationCondition.getConditionsNotMet(reduceVectorizationConditions);
    }

    @Explain(vectorization = Vectorization.DETAIL, displayName = "reduceColumnSortOrder", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public String getReduceColumnSortOrder() {
      if (!getVectorizationExamined()) {
        return null;
      }
      return reduceWork.getVectorReduceColumnSortOrder();
    }

    @Explain(vectorization = Vectorization.DETAIL, displayName = "reduceColumnNullOrder", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public String getReduceColumnNullOrder() {
      if (!getVectorizationExamined()) {
        return null;
      }
      return reduceWork.getVectorReduceColumnNullOrder();
    }
  }

  @Explain(vectorization = Vectorization.SUMMARY, displayName = "Reduce Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
  public ReduceExplainVectorization getReduceExplainVectorization() {
    if (!getVectorizationExamined()) {
      return null;
    }
    return new ReduceExplainVectorization(this);
  }

  public void setEdgePropRef(TezEdgeProperty edgeProp) {
    this.edgeProp = edgeProp;
  }

  public TezEdgeProperty getEdgePropRef() {
    return edgeProp;
  }
}