MapredWork.java example

Explorer
hive_blinkdb-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.plan;

import java.io.ByteArrayOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
import org.apache.hadoop.hive.ql.parse.SplitSample;

/**
 * MapredWork.
 *
 */
@Explain(displayName = "Map Reduce")
public class MapredWork implements Serializable {
  private static final long serialVersionUID = 1L;
  private String command;
  // map side work
  // use LinkedHashMap to make sure the iteration order is
  // deterministic, to ease testing
  private LinkedHashMap<String, ArrayList<String>> pathToAliases;

  private LinkedHashMap<String, PartitionDesc> pathToPartitionInfo;

  private LinkedHashMap<String, Operator<? extends Serializable>> aliasToWork;

  private LinkedHashMap<String, PartitionDesc> aliasToPartnInfo;

  private HashMap<String, SplitSample> nameToSplitSample;

  // map<->reduce interface
  // schema of the map-reduce 'key' object - this is homogeneous
  private TableDesc keyDesc;

  // schema of the map-reduce 'val' object - this is heterogeneous
  private List<TableDesc> tagToValueDesc;

  private Operator<?> reducer;

  private Integer numReduceTasks;
  private Integer numMapTasks;
  private Long maxSplitSize;
  private Long minSplitSize;
  private Long minSplitSizePerNode;
  private Long minSplitSizePerRack;

  private boolean needsTagging;
  private boolean hadoopSupportsSplittable;

  private MapredLocalWork mapLocalWork;
  private String inputformat;
  private String indexIntermediateFile;
  private boolean gatheringStats;

  private String tmpHDFSFileURI;

  private LinkedHashMap<Operator<? extends Serializable>, OpParseContext> opParseCtxMap;

  private QBJoinTree joinTree;

  private boolean mapperCannotSpanPartns;

  // used to indicate the input is sorted, and so a BinarySearchRecordReader shoudl be used
  private boolean inputFormatSorted = false;

  public MapredWork() {
    aliasToPartnInfo = new LinkedHashMap<String, PartitionDesc>();
  }

  public MapredWork(
      final String command,
      final LinkedHashMap<String, ArrayList<String>> pathToAliases,
      final LinkedHashMap<String, PartitionDesc> pathToPartitionInfo,
      final LinkedHashMap<String, Operator<? extends Serializable>> aliasToWork,
      final TableDesc keyDesc, List<TableDesc> tagToValueDesc,
      final Operator<?> reducer, final Integer numReduceTasks,
      final MapredLocalWork mapLocalWork,
      final boolean hadoopSupportsSplittable) {
    this.command = command;
    this.pathToAliases = pathToAliases;
    this.pathToPartitionInfo = pathToPartitionInfo;
    this.aliasToWork = aliasToWork;
    this.keyDesc = keyDesc;
    this.tagToValueDesc = tagToValueDesc;
    this.reducer = reducer;
    this.numReduceTasks = numReduceTasks;
    this.mapLocalWork = mapLocalWork;
    aliasToPartnInfo = new LinkedHashMap<String, PartitionDesc>();
    this.hadoopSupportsSplittable = hadoopSupportsSplittable;
    maxSplitSize = null;
    minSplitSize = null;
    minSplitSizePerNode = null;
    minSplitSizePerRack = null;
  }

  public String getCommand() {
    return command;
  }

  public void setCommand(final String command) {
    this.command = command;
  }

  @Explain(displayName = "Path -> Alias", normalExplain = false)
  public LinkedHashMap<String, ArrayList<String>> getPathToAliases() {
    return pathToAliases;
  }

  public void setPathToAliases(
      final LinkedHashMap<String, ArrayList<String>> pathToAliases) {
    this.pathToAliases = pathToAliases;
  }

  @Explain(displayName = "Path -> Partition", normalExplain = false)
  public LinkedHashMap<String, PartitionDesc> getPathToPartitionInfo() {
    return pathToPartitionInfo;
  }

  public void setPathToPartitionInfo(
      final LinkedHashMap<String, PartitionDesc> pathToPartitionInfo) {
    this.pathToPartitionInfo = pathToPartitionInfo;
  }

  /**
   * @return the aliasToPartnInfo
   */
  public LinkedHashMap<String, PartitionDesc> getAliasToPartnInfo() {
    return aliasToPartnInfo;
  }

  /**
   * @param aliasToPartnInfo
   *          the aliasToPartnInfo to set
   */
  public void setAliasToPartnInfo(
      LinkedHashMap<String, PartitionDesc> aliasToPartnInfo) {
    this.aliasToPartnInfo = aliasToPartnInfo;
  }

  @Explain(displayName = "Alias -> Map Operator Tree")
  public LinkedHashMap<String, Operator<? extends Serializable>> getAliasToWork() {
    return aliasToWork;
  }

  public void setAliasToWork(
      final LinkedHashMap<String, Operator<? extends Serializable>> aliasToWork) {
    this.aliasToWork = aliasToWork;
  }

  /**
   * @return the mapredLocalWork
   */
  @Explain(displayName = "Local Work")
  public MapredLocalWork getMapLocalWork() {
    return mapLocalWork;
  }

  /**
   * @param mapLocalWork
   *          the mapredLocalWork to set
   */
  public void setMapLocalWork(final MapredLocalWork mapLocalWork) {
    this.mapLocalWork = mapLocalWork;
  }

  public TableDesc getKeyDesc() {
    return keyDesc;
  }

  public void setKeyDesc(final TableDesc keyDesc) {
    this.keyDesc = keyDesc;
  }

  public List<TableDesc> getTagToValueDesc() {
    return tagToValueDesc;
  }

  public void setTagToValueDesc(final List<TableDesc> tagToValueDesc) {
    this.tagToValueDesc = tagToValueDesc;
  }

  @Explain(displayName = "Reduce Operator Tree")
  public Operator<?> getReducer() {
    return reducer;
  }

  @Explain(displayName = "Percentage Sample")
  public HashMap<String, SplitSample> getNameToSplitSample() {
    return nameToSplitSample;
  }

  public void setNameToSplitSample(HashMap<String, SplitSample> nameToSplitSample) {
    this.nameToSplitSample = nameToSplitSample;
  }

  public void setReducer(final Operator<?> reducer) {
    this.reducer = reducer;
  }

  public Integer getNumMapTasks() {
    return numMapTasks;
  }

  public void setNumMapTasks(Integer numMapTasks) {
    this.numMapTasks = numMapTasks;
  }

  /**
   * If the number of reducers is -1, the runtime will automatically figure it
   * out by input data size.
   *
   * The number of reducers will be a positive number only in case the target
   * table is bucketed into N buckets (through CREATE TABLE). This feature is
   * not supported yet, so the number of reducers will always be -1 for now.
   */
  public Integer getNumReduceTasks() {
    return numReduceTasks;
  }

  public void setNumReduceTasks(final Integer numReduceTasks) {
    this.numReduceTasks = numReduceTasks;
  }

  @SuppressWarnings("nls")
  public void addMapWork(String path, String alias, Operator<?> work,
      PartitionDesc pd) {
    ArrayList<String> curAliases = pathToAliases.get(path);
    if (curAliases == null) {
      assert (pathToPartitionInfo.get(path) == null);
      curAliases = new ArrayList<String>();
      pathToAliases.put(path, curAliases);
      pathToPartitionInfo.put(path, pd);
    } else {
      assert (pathToPartitionInfo.get(path) != null);
    }

    for (String oneAlias : curAliases) {
      if (oneAlias.equals(alias)) {
        throw new RuntimeException("Multiple aliases named: " + alias
            + " for path: " + path);
      }
    }
    curAliases.add(alias);

    if (aliasToWork.get(alias) != null) {
      throw new RuntimeException("Existing work for alias: " + alias);
    }
    aliasToWork.put(alias, work);
  }

  @SuppressWarnings("nls")
  public String isInvalid() {
    if ((getNumReduceTasks() >= 1) && (getReducer() == null)) {
      return "Reducers > 0 but no reduce operator";
    }

    if ((getNumReduceTasks() == 0) && (getReducer() != null)) {
      return "Reducers == 0 but reduce operator specified";
    }

    return null;
  }

  public String toXML() {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    Utilities.serializeMapRedWork(this, baos);
    return (baos.toString());
  }

  // non bean

  /**
   * For each map side operator - stores the alias the operator is working on
   * behalf of in the operator runtime state. This is used by reducesink
   * operator - but could be useful for debugging as well.
   */
  private void setAliases() {
    if(aliasToWork == null) {
      return;
    }
    for (String oneAlias : aliasToWork.keySet()) {
      aliasToWork.get(oneAlias).setAlias(oneAlias);
    }
  }

  /**
   * Derive additional attributes to be rendered by EXPLAIN.
   */
  public void deriveExplainAttributes() {
    if (pathToPartitionInfo != null) {
      for (Map.Entry<String, PartitionDesc> entry : pathToPartitionInfo
          .entrySet()) {
        entry.getValue().deriveBaseFileName(entry.getKey());
      }
    }
    if (mapLocalWork != null) {
      mapLocalWork.deriveExplainAttributes();
    }
  }

  public void initialize() {
    setAliases();
  }

  @Explain(displayName = "Needs Tagging", normalExplain = false)
  public boolean getNeedsTagging() {
    return needsTagging;
  }

  public void setNeedsTagging(boolean needsTagging) {
    this.needsTagging = needsTagging;
  }

  public boolean getHadoopSupportsSplittable() {
    return hadoopSupportsSplittable;
  }

  public void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable) {
    this.hadoopSupportsSplittable = hadoopSupportsSplittable;
  }

  public Long getMaxSplitSize() {
    return maxSplitSize;
  }

  public void setMaxSplitSize(Long maxSplitSize) {
    this.maxSplitSize = maxSplitSize;
  }

  public Long getMinSplitSize() {
    return minSplitSize;
  }

  public void setMinSplitSize(Long minSplitSize) {
    this.minSplitSize = minSplitSize;
  }

  public Long getMinSplitSizePerNode() {
    return minSplitSizePerNode;
  }

  public void setMinSplitSizePerNode(Long minSplitSizePerNode) {
    this.minSplitSizePerNode = minSplitSizePerNode;
  }

  public Long getMinSplitSizePerRack() {
    return minSplitSizePerRack;
  }

  public void setMinSplitSizePerRack(Long minSplitSizePerRack) {
    this.minSplitSizePerRack = minSplitSizePerRack;
  }

  public String getInputformat() {
    return inputformat;
  }

  public void setInputformat(String inputformat) {
    this.inputformat = inputformat;
  }

  public String getIndexIntermediateFile() {
    return indexIntermediateFile;
  }

  public void addIndexIntermediateFile(String fileName) {
    if (this.indexIntermediateFile == null) {
      this.indexIntermediateFile = fileName;
    } else {
      this.indexIntermediateFile += "," + fileName;
    }
  }

  public void setGatheringStats(boolean gatherStats) {
    this.gatheringStats = gatherStats;
  }

  public boolean isGatheringStats() {
    return this.gatheringStats;
  }

  public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns) {
    this.mapperCannotSpanPartns = mapperCannotSpanPartns;
  }

  public boolean isMapperCannotSpanPartns() {
    return this.mapperCannotSpanPartns;
  }

  public String getTmpHDFSFileURI() {
    return tmpHDFSFileURI;
  }

  public void setTmpHDFSFileURI(String tmpHDFSFileURI) {
    this.tmpHDFSFileURI = tmpHDFSFileURI;
  }


  public QBJoinTree getJoinTree() {
    return joinTree;
  }

  public void setJoinTree(QBJoinTree joinTree) {
    this.joinTree = joinTree;
  }

  public LinkedHashMap<Operator<? extends Serializable>, OpParseContext> getOpParseCtxMap() {
    return opParseCtxMap;
  }

  public void setOpParseCtxMap(
      LinkedHashMap<Operator<? extends Serializable>, OpParseContext> opParseCtxMap) {
    this.opParseCtxMap = opParseCtxMap;
  }

  public boolean isInputFormatSorted() {
    return inputFormatSorted;
  }

  public void setInputFormatSorted(boolean inputFormatSorted) {
    this.inputFormatSorted = inputFormatSorted;
  }

  public void resolveDynamicPartitionMerge(HiveConf conf, Path path,
      TableDesc tblDesc, ArrayList<String> aliases, PartitionDesc partDesc) {
    pathToAliases.put(path.toString(), aliases);
    pathToPartitionInfo.put(path.toString(), partDesc);
  }

  public List<Operator<?>> getAllOperators() {
    ArrayList<Operator<?>> opList = new ArrayList<Operator<?>>();
    ArrayList<Operator<?>> returnList = new ArrayList<Operator<?>>();

    if (getReducer() != null) {
      opList.add(getReducer());
    }

    Map<String, ArrayList<String>> pa = getPathToAliases();
    if (pa != null) {
      for (List<String> ls : pa.values()) {
        for (String a : ls) {
          Operator<?> op = getAliasToWork().get(a);
          if (op != null ) {
            opList.add(op);
          }
        }
      }
    }

    //recursively add all children
    while (!opList.isEmpty()) {
      Operator<?> op = opList.remove(0);
      if (op.getChildOperators() != null) {
        opList.addAll(op.getChildOperators());
      }
      returnList.add(op);
    }

    return returnList;
  }

}