SkewJoinOptimizer.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;

import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * SkewJoinOptimizer.
 *
 */
public class SkewJoinOptimizer extends Transform {

  private static final Logger LOG = LoggerFactory.getLogger(SkewJoinOptimizer.class.getName());

  public static class SkewJoinProc implements NodeProcessor {
    private ParseContext parseContext;

    public SkewJoinProc(ParseContext parseContext) {
      super();
      this.parseContext = parseContext;
    }

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
      Object... nodeOutputs) throws SemanticException {
      // We should be having a tree which looks like this
      //  TS -> * -> RS -
      //                  \
      //                   -> JOIN -> ..
      //                  /
      //  TS -> * -> RS -
      //
      // We are in the join operator now.

      SkewJoinOptProcCtx ctx = (SkewJoinOptProcCtx) procCtx;
      parseContext = ctx.getpGraphContext();

      JoinOperator joinOp = (JoinOperator)nd;
      // This join has already been processed
      if (ctx.getDoneJoins().contains(joinOp)) {
        return null;
      }

      ctx.getDoneJoins().add(joinOp);

      Operator<? extends OperatorDesc> currOp = joinOp;
      boolean processSelect = false;

      // Is there a select following
      // Clone the select also. It is useful for a follow-on optimization where the union
      // followed by a select star is completely removed.
      if ((joinOp.getChildOperators().size() == 1) &&
          (joinOp.getChildOperators().get(0) instanceof SelectOperator)) {
        currOp = joinOp.getChildOperators().get(0);
        processSelect = true;
      }

      List<TableScanOperator> tableScanOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOp, tableScanOpsForJoin)) {
        return null;
      }

      if ((tableScanOpsForJoin == null) || (tableScanOpsForJoin.isEmpty())) {
        return null;
      }

      // Get the skewed values in all the tables
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValues =
        getSkewedValues(joinOp, tableScanOpsForJoin);

      // If there are no skewed values, nothing needs to be done
      if (skewedValues == null || skewedValues.size() == 0) {
        return null;
      }

      // After this optimization, the tree should be like:
      //  TS -> (FIL "skewed rows") * -> RS -
      //                                     \
      //                                       ->   JOIN
      //                                     /           \
      //  TS -> (FIL "skewed rows") * -> RS -             \
      //                                                   \
      //                                                     ->  UNION -> ..
      //                                                   /
      //  TS -> (FIL "no skewed rows") * -> RS -          /
      //                                        \        /
      //                                         -> JOIN
      //                                        /
      //  TS -> (FIL "no skewed rows") * -> RS -
      //

      // Create a clone of the operator
      Operator<? extends OperatorDesc> currOpClone;
      try {
        currOpClone = currOp.clone();
        insertRowResolvers(currOp, currOpClone, ctx);
      } catch (CloneNotSupportedException e) {
        LOG.debug("Operator tree could not be cloned");
        return null;
      }

      JoinOperator joinOpClone;
      if (processSelect) {
        joinOpClone = (JoinOperator)(currOpClone.getParentOperators().get(0));
      } else {
        joinOpClone = (JoinOperator)currOpClone;
      }
      joinOpClone.getConf().cloneQBJoinTreeProps(joinOp.getConf());
      parseContext.getJoinOps().add(joinOpClone);

      List<TableScanOperator> tableScanCloneOpsForJoin =
          new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOpClone, tableScanCloneOpsForJoin)) {
        LOG.debug("Operator tree not properly cloned!");
        return null;
      }

      // Put the filter "skewed column = skewed keys" in op
      // and "skewed columns != skewed keys" in selectOpClone
      insertSkewFilter(tableScanOpsForJoin, skewedValues, true);

      insertSkewFilter(tableScanCloneOpsForJoin, skewedValues, false);

      // Update the topOps appropriately
      Map<String, Operator<? extends OperatorDesc>> topOps = getTopOps(joinOpClone);
      Map<String, TableScanOperator> origTopOps = parseContext.getTopOps();

      for (Entry<String, Operator<? extends OperatorDesc>> topOp : topOps.entrySet()) {
        TableScanOperator tso = (TableScanOperator) topOp.getValue();
        String tabAlias = tso.getConf().getAlias();
        int initCnt = 1;
        String newAlias = "subquery" + initCnt + ":" + tabAlias;
        while (origTopOps.containsKey(newAlias)) {
          initCnt++;
          newAlias = "subquery" + initCnt + ":" + tabAlias;
        }

        parseContext.getTopOps().put(newAlias, tso);
        setUpAlias(joinOp, joinOpClone, tabAlias, newAlias, tso);
      }

      // Now do a union of the select operators: selectOp and selectOpClone
      // Store the operator that follows the select after the join, we will be
      // adding this as a child to the Union later
      List<Operator<? extends OperatorDesc>> finalOps = currOp.getChildOperators();
      currOp.setChildOperators(null);
      currOpClone.setChildOperators(null);

      // Make the union operator
      List<Operator<? extends OperatorDesc>> oplist =
        new ArrayList<Operator<? extends OperatorDesc>>();
      oplist.add(currOp);
      oplist.add(currOpClone);
      Operator<? extends OperatorDesc> unionOp =
        OperatorFactory.getAndMakeChild(currOp.getCompilationOpContext(),
          new UnionDesc(), new RowSchema(currOp.getSchema().getSignature()), oplist);

      // Introduce a select after the union
      List<Operator<? extends OperatorDesc>> unionList =
        new ArrayList<Operator<? extends OperatorDesc>>();
      unionList.add(unionOp);

      Operator<? extends OperatorDesc> selectUnionOp =
        OperatorFactory.getAndMakeChild(currOp.getCompilationOpContext(), new SelectDesc(true),
          new RowSchema(unionOp.getSchema().getSignature()), unionList);

      // add the finalOp after the union
      selectUnionOp.setChildOperators(finalOps);
      // replace the original selectOp in the parents with selectUnionOp
      for (Operator<? extends OperatorDesc> finalOp : finalOps) {
        finalOp.replaceParent(currOp, selectUnionOp);
      }
      return null;
    }

    /*
     * Get the list of table scan operators for this join. A interface supportSkewJoinOptimization
     * has been provided. Currently, it is only enabled for simple filters and selects.
     */
    private boolean getTableScanOpsForJoin(
      JoinOperator op,
      List<TableScanOperator> tsOps) {

      for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
        if (!getTableScanOps(parent, tsOps)) {
          return false;
        }
      }
      return true;
    }

    private boolean getTableScanOps(
      Operator<? extends OperatorDesc> op,
      List<TableScanOperator> tsOps) {
      for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
        if (!parent.supportSkewJoinOptimization()) {
          return false;
        }

        if (parent instanceof TableScanOperator) {
          tsOps.add((TableScanOperator)parent);
        } else if (!getTableScanOps(parent, tsOps)) {
          return false;
        }
      }
      return true;
    }

    /**
     * Returns the skewed values in all the tables which are going to be scanned.
     * If the join is on columns c1, c2 and c3 on tables T1 and T2,
     * T1 is skewed on c1 and c4 with the skew values ((1,2),(3,4)),
     * whereas T2 is skewed on c1, c2 with skew values ((5,6),(7,8)), the resulting
     * map would be: <(c1) -> ((1), (3)), (c1,c2) -> ((5,6),(7,8))>
     * @param op The join operator being optimized
     * @param tableScanOpsForJoin table scan operators which are parents of the join operator
     * @return map<join keys intersection skewedkeys, list of skewed values>.
     * @throws SemanticException
     */
    private Map<List<ExprNodeDesc>, List<List<String>>>
      getSkewedValues(
        Operator<? extends OperatorDesc> op, List<TableScanOperator> tableScanOpsForJoin) throws SemanticException {

      Map <List<ExprNodeDesc>, List<List<String>>> skewDataReturn =
        new HashMap<List<ExprNodeDesc>, List<List<String>>>();

      Map <List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData =
          new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>();

      // The join keys are available in the reduceSinkOperators before join
      for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) {
        ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf();

        if (rsDesc.getKeyCols() != null) {
          TableScanOperator tableScanOp = null;
          Table table = null;
          // Find the skew information corresponding to the table
          List<String> skewedColumns = null;
          List<List<String>> skewedValueList = null;

          // The join columns which are also skewed
          List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols =
            new ArrayList<ExprNodeDescEqualityWrapper>();

          // skewed Keys which intersect with join keys
          List<Integer> positionSkewedKeys = new ArrayList<Integer>();

          // Update the joinKeys appropriately.
          for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) {
            ExprNodeColumnDesc keyCol = null;

            // If the key column is not a column, then dont apply this optimization.
            // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445
            // for type conversion UDFs.
            if (keyColDesc instanceof ExprNodeColumnDesc) {
              keyCol = (ExprNodeColumnDesc) keyColDesc;
              if (table == null) {
                tableScanOp = getTableScanOperator(parseContext, reduceSinkOp, tableScanOpsForJoin);
                table =
                  tableScanOp == null ? null : tableScanOp.getConf().getTableMetadata();
                skewedColumns =
                  table == null ? null : table.getSkewedColNames();
                // No skew on the table to take care of
                if ((skewedColumns == null) || (skewedColumns.isEmpty())) {
                  continue;
                }

                skewedValueList =
                  table == null ? null : table.getSkewedColValues();
              }
              ExprNodeDesc keyColOrigin = ExprNodeDescUtils.backtrack(keyCol,
                      reduceSinkOp, tableScanOp);
              int pos = keyColOrigin == null || !(keyColOrigin instanceof ExprNodeColumnDesc) ?
                      -1 : skewedColumns.indexOf(((ExprNodeColumnDesc)keyColOrigin).getColumn());
              if ((pos >= 0) && (!positionSkewedKeys.contains(pos))) {
                positionSkewedKeys.add(pos);
                ExprNodeColumnDesc keyColClone = (ExprNodeColumnDesc) keyColOrigin.clone();
                keyColClone.setTabAlias(null);
                joinKeysSkewedCols.add(new ExprNodeDescEqualityWrapper(keyColClone));
              }
            }
          }

          // If the skew keys match the join keys, then add it to the list
          if ((skewedColumns != null) && (!skewedColumns.isEmpty())) {
            if (!joinKeysSkewedCols.isEmpty()) {
              // If the join keys matches the skewed keys, use the table skewed keys
              List<List<String>> skewedJoinValues;
              if (skewedColumns.size() == positionSkewedKeys.size()) {
                skewedJoinValues = skewedValueList;
              }
              else {
                skewedJoinValues =
                  getSkewedJoinValues(skewedValueList, positionSkewedKeys);
              }

              List<List<String>> oldSkewedJoinValues =
                skewData.get(joinKeysSkewedCols);
              if (oldSkewedJoinValues == null) {
                oldSkewedJoinValues = new ArrayList<List<String>>();
              }
              for (List<String> skewValue : skewedJoinValues) {
                if (!oldSkewedJoinValues.contains(skewValue)) {
                  oldSkewedJoinValues.add(skewValue);
                }
              }

              skewData.put(joinKeysSkewedCols, oldSkewedJoinValues);
            }
          }
        }
      }

      // convert skewData to contain ExprNodeDesc in the keys
      for (Map.Entry<List<ExprNodeDescEqualityWrapper>, List<List<String>>> mapEntry :
        skewData.entrySet()) {
          List<ExprNodeDesc> skewedKeyJoinCols = new ArrayList<ExprNodeDesc>();
          for (ExprNodeDescEqualityWrapper key : mapEntry.getKey()) {
            skewedKeyJoinCols.add(key.getExprNodeDesc());
          }
          skewDataReturn.put(skewedKeyJoinCols, mapEntry.getValue());
      }

      return skewDataReturn;
    }

    /**
     * Get the table scan.
     */
    private TableScanOperator getTableScanOperator(
      ParseContext parseContext,
      Operator<? extends OperatorDesc> op,
      List<TableScanOperator> tableScanOpsForJoin) {
      while (true) {
        if (op instanceof TableScanOperator) {
          TableScanOperator tsOp = (TableScanOperator)op;
          if (tableScanOpsForJoin.contains(tsOp)) {
            return tsOp;
          }
        }
        if ((op.getParentOperators() == null) || (op.getParentOperators().isEmpty()) ||
            (op.getParentOperators().size() > 1)) {
          return null;
        }
        op = op.getParentOperators().get(0);
      }
    }

    /*
     * If the skewedValues contains ((1,2,3),(4,5,6)), and the user is looking for
     * positions (0,2), the result would be ((1,3),(4,6))
     * Get the skewed key values that are part of the join key.
     * @param skewedValuesList List of all the skewed values
     * @param positionSkewedKeys the requested positions
     * @return sub-list of skewed values with the positions present
     */
    private List<List<String>> getSkewedJoinValues(
      List<List<String>> skewedValueList, List<Integer> positionSkewedKeys) {
      List<List<String>> skewedJoinValues = new ArrayList<List<String>>();
      for (List<String> skewedValuesAllColumns : skewedValueList) {
        List<String> skewedValuesSpecifiedColumns = new ArrayList<String>();
        for (int pos : positionSkewedKeys) {
          skewedValuesSpecifiedColumns.add(skewedValuesAllColumns.get(pos));
        }
        skewedJoinValues.add(skewedValuesSpecifiedColumns);
      }
      return skewedJoinValues;
    }

    /**
     * Inserts a filter comparing the join keys with the skewed keys. If the table
     * is skewed with values (k1, v1) and (k2, v2) on columns (key, value), then
     * filter ((key=k1 AND value=v1) OR (key=k2 AND value=v2)) is inserted. If @skewed
     * is false, a NOT is inserted before it.
     * @param tableScanOpsForJoin table scans for which the filter will be inserted
     * @param skewedValuesList the map of <expressions, list of skewed values>
     * @param skewed True if we want skewedCol = skewedValue, false if we want
     * not (skewedCol = skewedValue)
     */
    private void insertSkewFilter(
      List<TableScanOperator> tableScanOpsForJoin,
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValuesList,
      boolean skewed) {

      ExprNodeDesc filterExpr = constructFilterExpr(skewedValuesList, skewed);
      for (TableScanOperator tableScanOp : tableScanOpsForJoin) {
        insertFilterOnTop(tableScanOp, filterExpr);
      }
    }

    /**
     * Inserts a filter below the table scan operator. Construct the filter
     * from the filter expression provided.
     * @param tableScanOp the table scan operators
     * @param filterExpr the filter expression
     */
    private void insertFilterOnTop(
      TableScanOperator tableScanOp,
      ExprNodeDesc filterExpr) {

      // Get the top operator and it's child, all operators have a single parent
      Operator<? extends OperatorDesc> currChild = tableScanOp.getChildOperators().get(0);

      // Create the filter Operator and update the parents and children appropriately
      tableScanOp.setChildOperators(null);
      currChild.setParentOperators(null);

      Operator<FilterDesc> filter = OperatorFactory.getAndMakeChild(
        new FilterDesc(filterExpr, false),
        new RowSchema(tableScanOp.getSchema().getSignature()), tableScanOp);
      OperatorFactory.makeChild(filter, currChild);
    }

    /**
     * Construct the filter expression from the skewed keys and skewed values.
     * If the skewed join keys are (k1), and (k1,k3) with the skewed values
     * (1,2) and ((2,3),(4,5)) respectively, the filter expression would be:
     * (k1=1) or (k1=2) or ((k1=2) and (k3=3)) or ((k1=4) and (k3=5)).
     */
    private ExprNodeDesc constructFilterExpr(
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValuesMap,
      boolean skewed) {

      ExprNodeDesc finalExprNodeDesc = null;
      try {
        for (Map.Entry<List<ExprNodeDesc>, List<List<String>>> mapEntry :
          skewedValuesMap.entrySet()) {
          List<ExprNodeDesc> keyCols = mapEntry.getKey();
          List<List<String>> skewedValuesList = mapEntry.getValue();

          for (List<String> skewedValues : skewedValuesList) {
            int keyPos = 0;
            ExprNodeDesc currExprNodeDesc = null;

            // Make the following condition: all the values match for all the columns
            for (String skewedValue : skewedValues) {
              List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();

              // We have ensured that the keys are columns
              ExprNodeColumnDesc keyCol = (ExprNodeColumnDesc) keyCols.get(keyPos).clone();
              keyPos++;
              children.add(keyCol);

              // Convert the constants available as strings to the corresponding objects
              children.add(createConstDesc(skewedValue, keyCol));

              ExprNodeGenericFuncDesc expr = null;
              // Create the equality condition
              expr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), children);
              if (currExprNodeDesc == null) {
                currExprNodeDesc = expr;
              } else {
                // If there are previous nodes, then AND the current node with the previous one
                List<ExprNodeDesc> childrenAND = new ArrayList<ExprNodeDesc>();
                childrenAND.add(currExprNodeDesc);
                childrenAND.add(expr);
                currExprNodeDesc =
                  ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), childrenAND);
              }
            }

            // If there are more than one skewed values,
            // then OR the current node with the previous one
            if (finalExprNodeDesc == null) {
              finalExprNodeDesc = currExprNodeDesc;
            } else {
              List<ExprNodeDesc> childrenOR = new ArrayList<ExprNodeDesc>();
              childrenOR.add(finalExprNodeDesc);
              childrenOR.add(currExprNodeDesc);

              finalExprNodeDesc =
                ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), childrenOR);
            }
          }
        }

        // Add a NOT operator in the beginning (this is for the cloned operator because we
        // want the values which are not skewed
        if (skewed == false) {
          List<ExprNodeDesc> childrenNOT = new ArrayList<ExprNodeDesc>();
          childrenNOT.add(finalExprNodeDesc);
          finalExprNodeDesc =
            ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), childrenNOT);
        }
      } catch (UDFArgumentException e) {
        // Ignore the exception because we are not comparing Long vs. String here.
        // There should never be an exception
        assert false;
      }
      return finalExprNodeDesc;
    }

    /**
     * Converts the skewedValue available as a string in the metadata to the appropriate object
     * by using the type of the column from the join key.
     * @param skewedValue
     * @param keyCol
     * @return an expression node descriptor of the appropriate constant
     */
    private ExprNodeConstantDesc createConstDesc(
      String skewedValue, ExprNodeColumnDesc keyCol) {
      ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
        TypeInfoFactory.stringTypeInfo);
      ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
        keyCol.getTypeInfo());
      Converter converter = ObjectInspectorConverters.getConverter(inputOI, outputOI);
      Object skewedValueObject = converter.convert(skewedValue);
      return new ExprNodeConstantDesc(keyCol.getTypeInfo(), skewedValueObject);
    }

    private Map<String, Operator<? extends OperatorDesc>> getTopOps(
      Operator<? extends OperatorDesc> op) {
      // Must be deterministic order map for consistent q-test output across
      // Java versions
      Map<String, Operator<? extends OperatorDesc>> topOps =
        new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
      if (op.getParentOperators() == null || op.getParentOperators().size() == 0) {
        topOps.put(((TableScanOperator)op).getConf().getAlias(), op);
      } else {
        for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
          if (parent != null) {
            topOps.putAll(getTopOps(parent));
          }
        }
      }
      return topOps;
    }

    private void insertRowResolvers(
      Operator<? extends OperatorDesc> op,
      Operator<? extends OperatorDesc> opClone,
      SkewJoinOptProcCtx ctx) {

      if (op instanceof TableScanOperator) {
        ctx.getCloneTSOpMap().put((TableScanOperator)opClone, (TableScanOperator)op);
      }

      List<Operator<? extends OperatorDesc>> parents = op.getParentOperators();
      List<Operator<? extends OperatorDesc>> parentClones = opClone.getParentOperators();
      if ((parents != null) && (!parents.isEmpty()) &&
        (parentClones != null) && (!parentClones.isEmpty())) {
        for (int pos = 0; pos < parents.size(); pos++) {
          insertRowResolvers(parents.get(pos), parentClones.get(pos), ctx);
        }
      }
    }

    /**
     * Set alias in the cloned join tree
     */
    private static void setUpAlias(JoinOperator origin, JoinOperator cloned, String origAlias,
        String newAlias, Operator<? extends OperatorDesc> topOp) {
      cloned.getConf().getAliasToOpInfo().remove(origAlias);
      cloned.getConf().getAliasToOpInfo().put(newAlias, topOp);
      if (origin.getConf().getLeftAlias().equals(origAlias)) {
        cloned.getConf().setLeftAlias(null);
        cloned.getConf().setLeftAlias(newAlias);
      }
      replaceAlias(origin.getConf().getLeftAliases(), cloned.getConf().getLeftAliases(), origAlias, newAlias);
      replaceAlias(origin.getConf().getRightAliases(), cloned.getConf().getRightAliases(), origAlias, newAlias);
      replaceAlias(origin.getConf().getBaseSrc(), cloned.getConf().getBaseSrc(), origAlias, newAlias);
      replaceAlias(origin.getConf().getMapAliases(), cloned.getConf().getMapAliases(), origAlias, newAlias);
      replaceAlias(origin.getConf().getStreamAliases(), cloned.getConf().getStreamAliases(), origAlias, newAlias);
    }

    private static void replaceAlias(String[] origin, String[] cloned,
        String alias, String newAlias) {
      if (origin == null || cloned == null || origin.length != cloned.length) {
        return;
      }
      for (int i = 0; i < origin.length; i++) {
        if (origin[i].equals(alias)) {
          cloned[i] = newAlias;
        }
      }
    }

    private static void replaceAlias(List<String> origin, List<String> cloned,
        String alias, String newAlias) {
      if (origin == null || cloned == null || origin.size() != cloned.size()) {
        return;
      }
      for (int i = 0; i < origin.size(); i++) {
        if (origin.get(i).equals(alias)) {
          cloned.set(i, newAlias);
        }
      }
    }
  }

  /* (non-Javadoc)
   * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform
   * (org.apache.hadoop.hive.ql.parse.ParseContext)
   */
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();

    opRules.put(new RuleRegExp("R1", "TS%.*RS%JOIN%"), getSkewJoinProc(pctx));
    SkewJoinOptProcCtx skewJoinOptProcCtx = new SkewJoinOptProcCtx(pctx);
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(
      null, opRules, skewJoinOptProcCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);

    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
  }

  private NodeProcessor getSkewJoinProc(ParseContext parseContext) {
    return new SkewJoinProc(parseContext);
  }

  /**
   * SkewJoinOptProcCtx.
   *
   */
  public static class SkewJoinOptProcCtx implements NodeProcessorCtx {

    private ParseContext pGraphContext;

    // set of joins already processed
    private Set<JoinOperator> doneJoins;
    private Map<TableScanOperator, TableScanOperator> cloneTSOpMap;

    public SkewJoinOptProcCtx(ParseContext pctx) {
      this.pGraphContext = pctx;
      doneJoins = new HashSet<JoinOperator>();
      cloneTSOpMap = new HashMap<TableScanOperator, TableScanOperator>();
    }

    public ParseContext getpGraphContext() {
      return pGraphContext;
    }

    public void setPGraphContext(ParseContext graphContext) {
      pGraphContext = graphContext;
    }

    public Set<JoinOperator> getDoneJoins() {
      return doneJoins;
    }

    public void setDoneJoins(Set<JoinOperator> doneJoins) {
      this.doneJoins = doneJoins;
    }

    public Map<TableScanOperator, TableScanOperator> getCloneTSOpMap() {
      return cloneTSOpMap;
    }

    public void setCloneTSOpMap(Map<TableScanOperator, TableScanOperator> cloneTSOpMap) {
      this.cloneTSOpMap = cloneTSOpMap;
    }
  }
}