/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.Map; import java.util.Set; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * Implementation of rule-based join table reordering optimization. User passes * hints to specify which tables are to be streamed and they are moved to have * largest tag so that they are processed last. In future, once statistics are * implemented, this transformation can also be done based on costs. */ public class JoinReorder extends Transform { private final Map<Operator<?>, Integer> cache = new IdentityHashMap<Operator<?>, Integer>(); /** * Estimate the size of the output based on the STREAMTABLE hints. To do so * the whole tree is traversed. Possible sizes: 0: the operator and its * subtree don't contain any big tables 1: the subtree of the operator * contains a big table 2: the operator is a big table * * @param operator * The operator which output size is to be estimated * @param bigTables * Set of tables that should be streamed * @return The estimated size - 0 (no streamed tables), 1 (streamed tables in * subtree) or 2 (a streamed table) */ private int getOutputSize(Operator<? extends OperatorDesc> operator, Set<String> bigTables) { // memoize decorator for getOutputSizeInternal if (cache.containsKey(operator)) { return cache.get(operator); } int result = getOutputSizeInternal(operator, bigTables); cache.put(operator, result); return result; } private int getOutputSizeInternal(Operator<? extends OperatorDesc> operator, Set<String> bigTables) { // If a join operator contains a big subtree, there is a chance that its // output is also big, so the output size is 1 (medium) if (operator instanceof JoinOperator) { for (Operator<? extends OperatorDesc> o : operator.getParentOperators()) { if (getOutputSize(o, bigTables) != 0) { return 1; } } } // If a table is in bigTables then its output is big (2) if (operator instanceof TableScanOperator) { String alias = ((TableScanOperator) operator).getConf().getAlias(); if (bigTables.contains(alias)) { return 2; } } // For all other kinds of operators, assume the output is as big as the // the biggest output from a parent int maxSize = 0; if (operator.getParentOperators() != null) { for (Operator<? extends OperatorDesc> o : operator.getParentOperators()) { // recurse into memoized decorator int current = getOutputSize(o, bigTables); if (current > maxSize) { maxSize = current; } } } return maxSize; } /** * Find all big tables from STREAMTABLE hints. * * @param joinCtx * The join context * @return Set of all big tables */ private Set<String> getBigTables(ParseContext joinCtx) { Set<String> bigTables = new HashSet<String>(); for (JoinOperator joinOp : joinCtx.getJoinOps()) { if (joinOp.getConf().getStreamAliases() != null) { bigTables.addAll(joinOp.getConf().getStreamAliases()); } } return bigTables; } /** * Reorder the tables in a join operator appropriately (by reordering the tags * of the reduces sinks). * * @param joinOp * The join operator to be processed * @param bigTables * Set of all big tables */ private void reorder(JoinOperator joinOp, Set<String> bigTables) { int count = joinOp.getParentOperators().size(); // Find the biggest reduce sink int biggestPos = count - 1; int biggestSize = getOutputSize( joinOp.getParentOperators().get(biggestPos), bigTables); for (int i = 0; i < count - 1; i++) { int currSize = getOutputSize(joinOp.getParentOperators().get(i), bigTables); if (currSize > biggestSize) { biggestSize = currSize; biggestPos = i; } } // Reorder tags if need be if (biggestPos != (count - 1)) { Byte[] tagOrder = joinOp.getConf().getTagOrder(); Byte temp = tagOrder[biggestPos]; tagOrder[biggestPos] = tagOrder[count - 1]; tagOrder[count - 1] = temp; // Update tags of reduce sinks ((ReduceSinkOperator) joinOp.getParentOperators().get(biggestPos)) .getConf().setTag(count - 1); ((ReduceSinkOperator) joinOp.getParentOperators().get(count - 1)) .getConf().setTag(biggestPos); } } /** * Transform the query tree. For each join, check which reduce sink will * output the biggest result (based on STREAMTABLE hints) and give it the * biggest tag so that it gets streamed. * * @param pactx * current parse context */ @Override public ParseContext transform(ParseContext pactx) throws SemanticException { Set<String> bigTables = getBigTables(pactx); cache.clear(); for (JoinOperator joinOp : pactx.getJoinOps()) { reorder(joinOp, bigTables); } return pactx; } }