/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements.See the NOTICE file * distributed with this work for additional information * regarding copyright ownership.The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License.You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TableAccessAnalyzer; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.util.ReflectionUtils; //try to replace a bucket map join with a sorted merge map join abstract public class AbstractSMBJoinProc extends AbstractBucketJoinProc implements NodeProcessor { public AbstractSMBJoinProc(ParseContext pctx) { super(pctx); } public AbstractSMBJoinProc() { super(); } @Override abstract public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException; /* * Return true or false based on whether a bucketed mapjoin can be converted successfully to * a sort-merge map join operator. The following checks are performed: * a. The mapjoin under consideration is a bucketed mapjoin. * b. All the tables are sorted in same order, such that join columns is equal to or a prefix * of the sort columns. */ protected boolean canConvertBucketMapJoinToSMBJoin(MapJoinOperator mapJoinOp, Stack<Node> stack, SortBucketJoinProcCtx smbJoinContext, Object... nodeOutputs) throws SemanticException { // Check whether the mapjoin is a bucketed mapjoin. // The above can be ascertained by checking the big table bucket -> small table buckets // mapping in the mapjoin descriptor. // First check if this map-join operator is already a BucketMapJoin or not. If not give up // we are only trying to convert a BucketMapJoin to sort-BucketMapJoin. if (mapJoinOp.getConf().getAliasBucketFileNameMapping() == null || mapJoinOp.getConf().getAliasBucketFileNameMapping().size() == 0) { return false; } if (!this.pGraphContext.getMapJoinOps().contains(mapJoinOp)) { return false; } String[] srcs = mapJoinOp.getConf().getBaseSrc(); for (int srcPos = 0; srcPos < srcs.length; srcPos++) { srcs[srcPos] = QB.getAppendedAliasFromId(mapJoinOp.getConf().getId(), srcs[srcPos]); } boolean tableEligibleForBucketedSortMergeJoin = true; // All the tables/partitions columns should be sorted in the same order // For example, if tables A and B are being joined on columns c1, c2 and c3 // which are the sorted and bucketed columns. The join would work, as long // c1, c2 and c3 are sorted in the same order. List<Order> sortColumnsFirstTable = new ArrayList<Order>(); for (int pos = 0; pos < srcs.length; pos++) { tableEligibleForBucketedSortMergeJoin = tableEligibleForBucketedSortMergeJoin && isEligibleForBucketSortMergeJoin(smbJoinContext, mapJoinOp.getConf().getKeys().get((byte) pos), mapJoinOp.getConf().getAliasToOpInfo(), srcs, pos, sortColumnsFirstTable); } if (!tableEligibleForBucketedSortMergeJoin) { // this is a mapjoin but not suited for a sort merge bucket map join. check outer joins if (MapJoinProcessor.checkMapJoin(mapJoinOp.getConf().getPosBigTable(), mapJoinOp.getConf().getConds()) < 0) { throw new SemanticException( ErrorMsg.INVALID_BIGTABLE_MAPJOIN.format(mapJoinOp.getConf().getBigTableAlias())); } return false; } smbJoinContext.setSrcs(srcs); return true; } // Convert the bucket map-join operator to a sort-merge map join operator protected SMBMapJoinOperator convertBucketMapJoinToSMBJoin(MapJoinOperator mapJoinOp, SortBucketJoinProcCtx smbJoinContext) { String[] srcs = smbJoinContext.getSrcs(); SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp); SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf()); smbJop.setConf(smbJoinDesc); HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>(); for (int i = 0; i < srcs.length; i++) { tagToAlias.put((byte) i, srcs[i]); } smbJoinDesc.setTagToAlias(tagToAlias); int indexInListMapJoinNoReducer = this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp); if (indexInListMapJoinNoReducer >= 0 ) { this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer); this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop); } Map<String, DummyStoreOperator> aliasToSink = new HashMap<String, DummyStoreOperator>(); // For all parents (other than the big table), insert a dummy store operator /* Consider a query like: * * select * from * (subq1 --> has a filter) * join * (subq2 --> has a filter) * on some key * * Let us assume that subq1 is the small table (either specified by the user or inferred * automatically). The following operator tree will be created: * * TableScan (subq1) --> Select --> Filter --> DummyStore * \ * \ SMBJoin * / * / * TableScan (subq2) --> Select --> Filter */ List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators(); for (int i = 0; i < parentOperators.size(); i++) { Operator<? extends OperatorDesc> par = parentOperators.get(i); int index = par.getChildOperators().indexOf(mapJoinOp); par.getChildOperators().remove(index); if (i == smbJoinDesc.getPosBigTable()) { par.getChildOperators().add(index, smbJop); } else { DummyStoreOperator dummyStoreOp = new DummyStoreOperator(par.getCompilationOpContext()); par.getChildOperators().add(index, dummyStoreOp); List<Operator<? extends OperatorDesc>> childrenOps = new ArrayList<Operator<? extends OperatorDesc>>(); childrenOps.add(smbJop); dummyStoreOp.setChildOperators(childrenOps); List<Operator<? extends OperatorDesc>> parentOps = new ArrayList<Operator<? extends OperatorDesc>>(); parentOps.add(par); dummyStoreOp.setParentOperators(parentOps); aliasToSink.put(srcs[i], dummyStoreOp); smbJop.getParentOperators().remove(i); smbJop.getParentOperators().add(i, dummyStoreOp); } } smbJoinDesc.setAliasToSink(aliasToSink); List<Operator<? extends OperatorDesc>> childOps = mapJoinOp.getChildOperators(); for (int i = 0; i < childOps.size(); i++) { Operator<? extends OperatorDesc> child = childOps.get(i); int index = child.getParentOperators().indexOf(mapJoinOp); child.getParentOperators().remove(index); child.getParentOperators().add(index, smbJop); } // Data structures coming from QBJoinTree smbJop.getConf().setQBJoinTreeProps(mapJoinOp.getConf()); // pGraphContext.getSmbMapJoinOps().add(smbJop); pGraphContext.getMapJoinOps().remove(mapJoinOp); return smbJop; } /** * Whether this table is eligible for a sort-merge join. * * @param pctx parse context * @param op map join operator being considered * @param joinTree join tree being considered * @param alias table alias in the join tree being checked * @param pos position of the table * @param sortColumnsFirstTable The names and order of the sorted columns for the first table. * It is not initialized when pos = 0. * @return * @throws SemanticException */ private boolean isEligibleForBucketSortMergeJoin( SortBucketJoinProcCtx smbJoinContext, List<ExprNodeDesc> keys, Map<String, Operator<? extends OperatorDesc>> aliasToOpInfo, String[] aliases, int pos, List<Order> sortColumnsFirstTable) throws SemanticException { String alias = aliases[pos]; /* * Consider a query like: * * select -- mapjoin(subq1) -- * from * (select a.key, a.value from tbl1 a) subq1 * join * (select a.key, a.value from tbl2 a) subq2 * on subq1.key = subq2.key; * * aliasToOpInfo contains the SelectOperator for subq1 and subq2. * We need to traverse the tree (using TableAccessAnalyzer) to get to the base * table. If the object being map-joined is a base table, then aliasToOpInfo * contains the TableScanOperator, and TableAccessAnalyzer is a no-op. */ Operator<? extends OperatorDesc> topOp = aliasToOpInfo.get(alias); if (topOp == null) { return false; } // get all join columns from join keys List<String> joinCols = toColumns(keys); if (joinCols == null || joinCols.isEmpty()) { return false; } TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, joinCols); if (tso == null) { return false; } // For nested sub-queries, the alias mapping is not maintained in QB currently. /* * Consider a query like: * * select count(*) from * ( * select key, count(*) from * ( * select --mapjoin(a)-- a.key as key, a.value as val1, b.value as val2 * from tbl1 a join tbl2 b on a.key = b.key * ) subq1 * group by key * ) subq2; * * The table alias should be subq2:subq1:a which needs to be fetched from topOps. */ if (pGraphContext.getTopOps().containsValue(tso)) { for (Map.Entry<String, TableScanOperator> topOpEntry : this.pGraphContext.getTopOps().entrySet()) { if (topOpEntry.getValue() == tso) { alias = topOpEntry.getKey(); aliases[pos] = alias; break; } } } else { // Ideally, this should never happen, and this should be an assert. return false; } Table tbl = tso.getConf().getTableMetadata(); if (tbl.isPartitioned()) { PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso); List<Partition> partitions = prunedParts.getNotDeniedPartns(); // Populate the names and order of columns for the first partition of the // first table if ((pos == 0) && (partitions != null) && (!partitions.isEmpty())) { Partition firstPartition = partitions.get(0); sortColumnsFirstTable.addAll(firstPartition.getSortCols()); } for (Partition partition : prunedParts.getNotDeniedPartns()) { if (!checkSortColsAndJoinCols(partition.getSortCols(), joinCols, sortColumnsFirstTable)) { return false; } } return true; } // Populate the names and order of columns for the first table if (pos == 0) { sortColumnsFirstTable.addAll(tbl.getSortCols()); } return checkSortColsAndJoinCols(tbl.getSortCols(), joinCols, sortColumnsFirstTable); } private boolean checkSortColsAndJoinCols(List<Order> sortCols, List<String> joinCols, List<Order> sortColumnsFirstPartition) { if (sortCols == null || sortCols.size() < joinCols.size()) { return false; } // A join is eligible for a sort-merge join, only if it is eligible for // a bucketized map join. So, we dont need to check for bucketized map // join here. We are guaranteed that the join keys contain all the // bucketized keys (note that the order need not be the same). List<String> sortColNames = new ArrayList<String>(); // The join columns should contain all the sort columns // The sort columns of all the tables should be in the same order // compare the column names and the order with the first table/partition. for (int pos = 0; pos < sortCols.size(); pos++) { Order o = sortCols.get(pos); if (pos < sortColumnsFirstPartition.size()) { if (o.getOrder() != sortColumnsFirstPartition.get(pos).getOrder()) { return false; } } sortColNames.add(o.getCol()); } // The column names and order (ascending/descending) matched // The first 'n' sorted columns should be the same as the joinCols, where // 'n' is the size of join columns. // For eg: if the table is sorted by (a,b,c), it is OK to convert if the join is // on (a), (a,b), or any combination of (a,b,c): // (a,b,c), (a,c,b), (c,a,b), (c,b,a), (b,c,a), (b,a,c) // but it is not OK to convert if the join is on (a,c) return sortColNames.subList(0, joinCols.size()).containsAll(joinCols); } // Can the join operator be converted to a sort-merge join operator ? // It is already verified that the join can be converted to a bucket map join protected boolean checkConvertJoinToSMBJoin( JoinOperator joinOperator, SortBucketJoinProcCtx smbJoinContext) throws SemanticException { if (!this.pGraphContext.getJoinOps().contains(joinOperator)) { return false; } String[] srcs = joinOperator.getConf().getBaseSrc(); // All the tables/partitions columns should be sorted in the same order // For example, if tables A and B are being joined on columns c1, c2 and c3 // which are the sorted and bucketed columns. The join would work, as long // c1, c2 and c3 are sorted in the same order. List<Order> sortColumnsFirstTable = new ArrayList<Order>(); for (int pos = 0; pos < srcs.length; pos++) { if (!isEligibleForBucketSortMergeJoin(smbJoinContext, smbJoinContext.getKeyExprMap().get((byte) pos), joinOperator.getConf().getAliasToOpInfo(), srcs, pos, sortColumnsFirstTable)) { return false; } } smbJoinContext.setSrcs(srcs); return true; } // Can the join operator be converted to a sort-merge join operator ? protected boolean canConvertJoinToSMBJoin( JoinOperator joinOperator, SortBucketJoinProcCtx smbJoinContext) throws SemanticException { boolean canConvert = canConvertJoinToBucketMapJoin( joinOperator, smbJoinContext ); if (!canConvert) { return false; } return checkConvertJoinToSMBJoin(joinOperator, smbJoinContext); } // Can the join operator be converted to a bucket map-merge join operator ? @SuppressWarnings("unchecked") protected boolean canConvertJoinToBucketMapJoin( JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException { // This has already been inspected and rejected if (context.getRejectedJoinOps().contains(joinOp)) { return false; } if (!this.pGraphContext.getJoinOps().contains(joinOp)) { return false; } Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null; try { String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR); bigTableMatcherClass = JavaUtils.loadClass(selector); } catch (ClassNotFoundException e) { throw new SemanticException(e.getMessage()); } BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null); JoinDesc joinDesc = joinOp.getConf(); JoinCondDesc[] joinCondns = joinDesc.getConds(); Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns); if (joinCandidates.isEmpty()) { // This is a full outer join. This can never be a map-join // of any type. So return false. return false; } int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates); if (bigTablePosition < 0) { // contains aliases from sub-query return false; } context.setBigTablePosition(bigTablePosition); String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1]; joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias); Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>(); List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators(); // get the join keys from parent ReduceSink operators for (Operator<? extends OperatorDesc> parentOp : parentOps) { ReduceSinkDesc rsconf = ((ReduceSinkOperator)parentOp).getConf(); Byte tag = (byte) rsconf.getTag(); List<ExprNodeDesc> keys = rsconf.getKeyCols(); keyExprMap.put(tag, keys); } context.setKeyExprMap(keyExprMap); // Make a deep copy of the aliases so that they are not changed in the context String[] joinSrcs = joinOp.getConf().getBaseSrc(); String[] srcs = new String[joinSrcs.length]; for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) { joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]); srcs[srcPos] = new String(joinSrcs[srcPos]); } // Given a candidate map-join, can this join be converted. // The candidate map-join was derived from the pluggable sort merge join big // table matcher. return checkConvertBucketMapJoin( context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs)); } // Convert the join operator to a bucket map-join join operator protected MapJoinOperator convertJoinToBucketMapJoin( JoinOperator joinOp, SortBucketJoinProcCtx joinContext) throws SemanticException { MapJoinOperator mapJoinOp = new MapJoinProcessor().convertMapJoin( pGraphContext.getConf(), joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), joinContext.getBigTablePosition(), false, false); // Remove the join operator from the query join context // Data structures coming from QBJoinTree mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf()); // pGraphContext.getMapJoinOps().add(mapJoinOp); pGraphContext.getJoinOps().remove(joinOp); convertMapJoinToBucketMapJoin(mapJoinOp, joinContext); return mapJoinOp; } // Convert the join operator to a sort-merge join operator protected void convertJoinToSMBJoin( JoinOperator joinOp, SortBucketJoinProcCtx smbJoinContext) throws SemanticException { MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext); SMBMapJoinOperator smbMapJoinOp = convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext); smbMapJoinOp.setConvertedAutomaticallySMBJoin(true); } }