MergeJoinWork.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.plan;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.mapred.JobConf;

public class MergeJoinWork extends BaseWork {

  private CommonMergeJoinOperator mergeJoinOp = null;
  private final List<BaseWork> mergeWorkList = new ArrayList<BaseWork>();
  private BaseWork bigTableWork;

  public MergeJoinWork() {
    super();
  }

  @Override
  public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap) {
    getMainWork().replaceRoots(replacementMap);
  }

  @Override
  public Set<Operator<?>> getAllRootOperators() {
    Set<Operator<?>> set = new HashSet<>();
    set.addAll(getMainWork().getAllRootOperators());
    for (BaseWork w : mergeWorkList) {
      set.addAll(w.getAllRootOperators());
    }
    return set;
  }

  @Override
  public Operator<?> getAnyRootOperator() {
    return getMainWork().getAnyRootOperator();
  }

  @Override
  public void configureJobConf(JobConf job) {
  }

  public CommonMergeJoinOperator getMergeJoinOperator() {
    return this.mergeJoinOp;
  }

  public void setMergeJoinOperator(CommonMergeJoinOperator mergeJoinOp) {
    this.mergeJoinOp = mergeJoinOp;
  }

  public void addMergedWork(BaseWork work, BaseWork connectWork,
      Map<Operator<?>, BaseWork> leafOperatorToFollowingWork) {
    if (work != null) {
      if ((bigTableWork != null) && (bigTableWork != work)) {
        assert false;
      }
      this.bigTableWork = work;
      setName(work.getName());
    }

    if (connectWork != null) {
      this.mergeWorkList.add(connectWork);
      if ((connectWork instanceof ReduceWork) && (bigTableWork != null)) {
        /*
         * For tez to route data from an up-stream vertex correctly to the following vertex, the
         * output name in the reduce sink needs to be setup appropriately. In the case of reduce
         * side merge work, we need to ensure that the parent work that provides data to this merge
         * work is setup to point to the right vertex name - the main work name.
         * 
         * In this case, if the big table work has already been created, we can hook up the merge
         * work items for the small table correctly.
         */
        setReduceSinkOutputName(connectWork, leafOperatorToFollowingWork, bigTableWork.getName());
      }
    }

    if (work != null) {
      /*
       * Same reason as above. This is the case when we have the main work item after the merge work
       * has been created for the small table side.
       */
      for (BaseWork mergeWork : mergeWorkList) {
        if (mergeWork instanceof ReduceWork) {
          setReduceSinkOutputName(mergeWork, leafOperatorToFollowingWork, work.getName());
        }
      }
    }
  }

  private void setReduceSinkOutputName(BaseWork mergeWork,
      Map<Operator<?>, BaseWork> leafOperatorToFollowingWork, String name) {
    for (Entry<Operator<?>, BaseWork> entry : leafOperatorToFollowingWork.entrySet()) {
      if (entry.getValue() == mergeWork) {
        ((ReduceSinkOperator) entry.getKey()).getConf().setOutputName(name);
      }
    }
  }

  @Explain(skipHeader=true, displayName = "Join", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
  public List<BaseWork> getBaseWorkList() {
    return mergeWorkList;
  }

  public String getBigTableAlias() {
    return ((MapWork) bigTableWork).getAliasToWork().keySet().iterator().next();
  }

  @Explain(skipHeader=true, displayName = "Main", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
  public BaseWork getMainWork() {
    return bigTableWork;
  }

  @Override
  public void setDummyOps(List<HashTableDummyOperator> dummyOps) {
    getMainWork().setDummyOps(dummyOps);
  }

  @Override
  public List<HashTableDummyOperator> getDummyOps() {
    return getMainWork().getDummyOps();
  }

  @Override
  public void setVectorMode(boolean vectorMode) {
    getMainWork().setVectorMode(vectorMode);
  }

  @Override
  public boolean getVectorMode() {
    return getMainWork().getVectorMode();
  }

  @Override
  public void setUberMode(boolean uberMode) {
    getMainWork().setUberMode(uberMode);
  }

  @Override
  public boolean getUberMode() {
    return getMainWork().getUberMode();
  }

  @Override
  public void setLlapMode(boolean llapMode) {
    getMainWork().setLlapMode(llapMode);
  }

  @Override
  public boolean getLlapMode() {
    return getMainWork().getLlapMode();
  }
  
  public void addDummyOp(HashTableDummyOperator dummyOp) {
    getMainWork().addDummyOp(dummyOp);
  }
}