/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer.correlation; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.optimizer.physical.CommonJoinTaskDispatcher; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * Implementation of Correlation Optimizer. This optimizer is based on * the paper "YSmart: Yet Another SQL-to-MapReduce Translator" * (Rubao Lee, Tian Luo, Yin Huai, Fusheng Wang, Yongqiang He, and Xiaodong Zhang) * (http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/papers/TR-11-7.pdf). * Correlation Optimizer detects if ReduceSinkOperators share same keys. * Then, it will transform the query plan tree (operator tree) by exploiting * detected correlations. For details, see the original paper of YSmart. * * Test queries associated with this optimizer are correlationoptimizer1.q to * correlationoptimizer14.q */ public class CorrelationOptimizer extends Transform { private static final Logger LOG = LoggerFactory.getLogger(CorrelationOptimizer.class.getName()); private boolean abort; // if correlation optimizer will not try to optimize this query private ParseContext pCtx; //Join operators which may be converted by CommonJoinResolver; private final Set<Operator<? extends OperatorDesc>> skipedJoinOperators; public CorrelationOptimizer() { super(); pCtx = null; skipedJoinOperators = new HashSet<Operator<? extends OperatorDesc>>(); abort = false; } private void findPossibleAutoConvertedJoinOperators() throws SemanticException { // Guess if CommonJoinResolver will work. If CommonJoinResolver may // convert a join operation, correlation optimizer will not merge that join. // TODO: If hive.auto.convert.join.noconditionaltask=true, for a JoinOperator // that has both intermediate tables and query input tables as input tables, // we should be able to guess if this JoinOperator will be converted to a MapJoin // based on hive.auto.convert.join.noconditionaltask.size. for (JoinOperator joinOp: pCtx.getJoinOps()) { boolean isAbleToGuess = true; boolean mayConvert = false; // Get total size and individual alias's size long aliasTotalKnownInputSize = 0; Map<String, Long> aliasToSize = new HashMap<String, Long>(); Map<Integer, Set<String>> posToAliases = new HashMap<Integer, Set<String>>(); for (int pos = 0; pos < joinOp.getNumParent(); pos++) { Operator<? extends OperatorDesc> op = joinOp.getParentOperators().get(pos); Set<TableScanOperator> topOps = CorrelationUtilities.findTableScanOperators(op); if (topOps.isEmpty()) { isAbleToGuess = false; break; } Set<String> aliases = new LinkedHashSet<String>(); for (TableScanOperator tsop : topOps) { Table table = tsop.getConf().getTableMetadata(); if (table == null) { // table should not be null. throw new SemanticException("The table of " + tsop.getName() + " " + tsop.getIdentifier() + " is null, which is not expected."); } String alias = tsop.getConf().getAlias(); aliases.add(alias); Path p = table.getPath(); ContentSummary resultCs = null; try { FileSystem fs = table.getPath().getFileSystem(pCtx.getConf()); resultCs = fs.getContentSummary(p); } catch (IOException e) { LOG.warn("Encounter a error while querying content summary of table " + table.getCompleteName() + " from FileSystem. " + "Cannot guess if CommonJoinOperator will optimize " + joinOp.getName() + " " + joinOp.getIdentifier()); } if (resultCs == null) { isAbleToGuess = false; break; } long size = resultCs.getLength(); aliasTotalKnownInputSize += size; Long es = aliasToSize.get(alias); if(es == null) { es = new Long(0); } es += size; aliasToSize.put(alias, es); } posToAliases.put(pos, aliases); } if (!isAbleToGuess) { LOG.info("Cannot guess if CommonJoinOperator will optimize " + joinOp.getName() + " " + joinOp.getIdentifier()); continue; } JoinDesc joinDesc = joinOp.getConf(); Byte[] order = joinDesc.getTagOrder(); int numAliases = order.length; Set<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds()); if (bigTableCandidates.isEmpty()) { continue; } long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(pCtx.getConf(), HiveConf.ConfVars.HIVESMALLTABLESFILESIZE); for (int i = 0; i < numAliases; i++) { // this table cannot be big table if (!bigTableCandidates.contains(i)) { continue; } Set<String> aliases = posToAliases.get(i); long aliasKnownSize = Utilities.sumOf(aliasToSize, aliases); if (!CommonJoinTaskDispatcher.cannotConvert(aliasKnownSize, aliasTotalKnownInputSize, ThresholdOfSmallTblSizeSum)) { mayConvert = true; } } if (mayConvert) { LOG.info(joinOp.getName() + " " + joinOp.getIdentifier() + " may be converted to MapJoin by CommonJoinResolver"); skipedJoinOperators.add(joinOp); } } } /** * Detect correlations and transform the query tree. * * @param pactx * current parse context * @throws SemanticException */ public ParseContext transform(ParseContext pctx) throws SemanticException { pCtx = pctx; if (HiveConf.getBoolVar(pCtx.getConf(),HiveConf.ConfVars.HIVECONVERTJOIN)) { findPossibleAutoConvertedJoinOperators(); } // detect correlations CorrelationNodeProcCtx corrCtx = new CorrelationNodeProcCtx(pCtx); Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + "%"), new CorrelationNodeProc()); Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, corrCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topOp nodes List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pCtx.getTopOps().values()); ogw.startWalking(topNodes, null); // We have finished tree walking (correlation detection). // We will first see if we need to abort (the operator tree has not been changed). // If not, we will start to transform the operator tree. abort = corrCtx.isAbort(); if (abort) { LOG.info("Abort. Reasons are ..."); for (String reason : corrCtx.getAbortReasons()) { LOG.info("-- " + reason); } } else { // transform the operator tree LOG.info("Begain query plan transformation based on intra-query correlations. " + corrCtx.getCorrelations().size() + " correlation(s) to be applied"); for (IntraQueryCorrelation correlation : corrCtx.getCorrelations()) { QueryPlanTreeTransformation.applyCorrelation(pCtx, corrCtx, correlation); } } return pCtx; } private class CorrelationNodeProc implements NodeProcessor { private void analyzeReduceSinkOperatorsOfJoinOperator(JoinCondDesc[] joinConds, List<Operator<? extends OperatorDesc>> rsOps, Operator<? extends OperatorDesc> curentRsOp, Set<ReduceSinkOperator> correlatedRsOps) { if (correlatedRsOps.contains((ReduceSinkOperator) curentRsOp)) { return; } correlatedRsOps.add((ReduceSinkOperator) curentRsOp); int pos = rsOps.indexOf(curentRsOp); for (int i = 0; i < joinConds.length; i++) { JoinCondDesc joinCond = joinConds[i]; int type = joinCond.getType(); if (pos == joinCond.getLeft()) { if (type == JoinDesc.INNER_JOIN || type == JoinDesc.LEFT_OUTER_JOIN || type == JoinDesc.LEFT_SEMI_JOIN) { Operator<? extends OperatorDesc> newCurrentRsOps = rsOps.get(joinCond.getRight()); analyzeReduceSinkOperatorsOfJoinOperator(joinConds, rsOps, newCurrentRsOps, correlatedRsOps); } } else if (pos == joinCond.getRight()) { if (type == JoinDesc.INNER_JOIN || type == JoinDesc.RIGHT_OUTER_JOIN) { Operator<? extends OperatorDesc> newCurrentRsOps = rsOps.get(joinCond.getLeft()); analyzeReduceSinkOperatorsOfJoinOperator(joinConds, rsOps, newCurrentRsOps, correlatedRsOps); } } } } private boolean sameKeys(List<ExprNodeDesc> k1, List<ExprNodeDesc> k2) { if (k1.size() != k2.size()) { return false; } for (int i = 0; i < k1.size(); i++) { ExprNodeDesc expr1 = k1.get(i); ExprNodeDesc expr2 = k2.get(i); if (expr1 == null) { if (expr2 == null) { continue; } else { return false; } } else { if (!expr1.isSame(expr2)) { return false; } } } return true; } private boolean sameOrder(String order1, String order2) { if (order1 == null || order1.trim().equals("")) { if (order2 == null || order2.trim().equals("")) { return true; } return false; } if (order2 == null || order2.trim().equals("")) { return false; } order1 = order1.trim(); order2 = order2.trim(); if (!order1.equals(order2)) { return false; } return true; } /** * This method is used to recursively traverse the tree to find * ReduceSinkOperators which share the same key columns and partitioning * columns. Those ReduceSinkOperators are called correlated ReduceSinkOperaotrs. * * @param child The child of the current operator * @param childKeyCols The key columns from the child operator * @param childPartitionCols The partitioning columns from the child operator * @param childRSOrder The sorting order of key columns from the child operator * @param current The current operator we are visiting * @param correlation The object keeps tracking the correlation * @return * @throws SemanticException */ private LinkedHashSet<ReduceSinkOperator> findCorrelatedReduceSinkOperators( Operator<? extends OperatorDesc> child, List<ExprNodeDesc> childKeyCols, List<ExprNodeDesc> childPartitionCols, String childRSOrder, Operator<? extends OperatorDesc> current, IntraQueryCorrelation correlation) throws SemanticException { LOG.info("now detecting operator " + current.getIdentifier() + " " + current.getName()); LinkedHashSet<ReduceSinkOperator> correlatedReduceSinkOperators = new LinkedHashSet<ReduceSinkOperator>(); if (skipedJoinOperators.contains(current)) { LOG.info(current.getName() + " " + current.getIdentifier() + " may be converted to MapJoin by " + "CommonJoinResolver. Correlation optimizer will not detect correlations" + "involved in this operator"); return correlatedReduceSinkOperators; } if ((current.getParentOperators() == null) || (current.getParentOperators().isEmpty())) { return correlatedReduceSinkOperators; } if (current instanceof PTFOperator) { // Currently, we do not support PTF operator. LOG.info("Currently, correlation optimizer does not support PTF operator."); return correlatedReduceSinkOperators; } if (current instanceof UnionOperator) { // If we get a UnionOperator, right now, we only handle it when // we can find correlated ReduceSinkOperators from all inputs. LinkedHashSet<ReduceSinkOperator> corrRSs = new LinkedHashSet<ReduceSinkOperator>(); for (Operator<? extends OperatorDesc> parent : current.getParentOperators()) { LinkedHashSet<ReduceSinkOperator> tmp = findCorrelatedReduceSinkOperators( current, childKeyCols, childPartitionCols, childRSOrder, parent, correlation); if (tmp != null && tmp.size() > 0) { corrRSs.addAll(tmp); } else { return correlatedReduceSinkOperators; } } correlatedReduceSinkOperators.addAll(corrRSs); UnionOperator union = (UnionOperator)current; union.getConf().setAllInputsInSameReducer(true); } else if (current.getColumnExprMap() == null && !(current instanceof ReduceSinkOperator)) { for (Operator<? extends OperatorDesc> parent : current.getParentOperators()) { correlatedReduceSinkOperators.addAll( findCorrelatedReduceSinkOperators( current, childKeyCols, childPartitionCols, childRSOrder, parent, correlation)); } } else if (current.getColumnExprMap() != null && !(current instanceof ReduceSinkOperator)) { List<ExprNodeDesc> backtrackedKeyCols = ExprNodeDescUtils.backtrack(childKeyCols, child, current); List<ExprNodeDesc> backtrackedPartitionCols = ExprNodeDescUtils.backtrack(childPartitionCols, child, current); RowSchema rowSchema = current.getSchema(); Set<String> tableNeedToCheck = new HashSet<String>(); for (ExprNodeDesc expr: childKeyCols) { if (!(expr instanceof ExprNodeColumnDesc)) { return correlatedReduceSinkOperators; } String colName = ((ExprNodeColumnDesc)expr).getColumn(); ColumnInfo columnInfo = rowSchema.getColumnInfo(colName); if (columnInfo != null) { tableNeedToCheck.add(columnInfo.getTabAlias()); } } if (current instanceof JoinOperator) { boolean isCorrelated = true; int expectedNumCorrelatedRsops = current.getParentOperators().size(); LinkedHashSet<ReduceSinkOperator> correlatedRsops = null; for (Operator<? extends OperatorDesc> parent : current.getParentOperators()) { Set<String> tableNames = parent.getSchema().getTableNames(); for (String tbl : tableNames) { if (tableNeedToCheck.contains(tbl)) { correlatedRsops = findCorrelatedReduceSinkOperators(current, backtrackedKeyCols, backtrackedPartitionCols, childRSOrder, parent, correlation); if (correlatedRsops.size() != expectedNumCorrelatedRsops) { isCorrelated = false; } } } if (!isCorrelated) { break; } } // If current is JoinOperaotr, we will stop to traverse the tree // when any of parent ReduceSinkOperaotr of this JoinOperator is // not considered as a correlated ReduceSinkOperator. if (isCorrelated && correlatedRsops != null) { correlatedReduceSinkOperators.addAll(correlatedRsops); } else { correlatedReduceSinkOperators.clear(); } } else { for (Operator<? extends OperatorDesc> parent : current.getParentOperators()) { correlatedReduceSinkOperators.addAll(findCorrelatedReduceSinkOperators( current, backtrackedKeyCols, backtrackedPartitionCols, childRSOrder, parent, correlation)); } } } else if (current.getColumnExprMap() != null && current instanceof ReduceSinkOperator) { ReduceSinkOperator rsop = (ReduceSinkOperator) current; List<ExprNodeDesc> backtrackedKeyCols = ExprNodeDescUtils.backtrack(childKeyCols, child, current); List<ExprNodeDesc> backtrackedPartitionCols = ExprNodeDescUtils.backtrack(childPartitionCols, child, current); List<ExprNodeDesc> rsKeyCols = rsop.getConf().getKeyCols(); List<ExprNodeDesc> rsPartitionCols = rsop.getConf().getPartitionCols(); // Two ReduceSinkOperators are correlated means that // they have same sorting columns (key columns), same partitioning columns, // same sorting orders, and no conflict on the numbers of reducers. // TODO: we should relax this condition // TODO: we need to handle aggregation functions with distinct keyword. In this case, // distinct columns will be added to the key columns. boolean isCorrelated = sameKeys(rsKeyCols, backtrackedKeyCols) && sameOrder(rsop.getConf().getOrder(), childRSOrder) && sameKeys(backtrackedPartitionCols, rsPartitionCols) && correlation.adjustNumReducers(rsop.getConf().getNumReducers()); GroupByOperator cGBY = CorrelationUtilities.getSingleChild(rsop, GroupByOperator.class); if (cGBY != null) { if (CorrelationUtilities.hasGroupingSet(rsop) || cGBY.getConf().isGroupingSetsPresent()) { // Do not support grouping set right now isCorrelated = false; } } if (isCorrelated) { LOG.info("Operator " + current.getIdentifier() + " " + current.getName() + " is correlated"); Operator<? extends OperatorDesc> childOperator = CorrelationUtilities.getSingleChild(current, true); if (childOperator instanceof JoinOperator) { JoinOperator joinOp = (JoinOperator) childOperator; JoinCondDesc[] joinConds = joinOp.getConf().getConds(); List<Operator<? extends OperatorDesc>> rsOps = joinOp.getParentOperators(); LinkedHashSet<ReduceSinkOperator> correlatedRsOps = new LinkedHashSet<ReduceSinkOperator>(); analyzeReduceSinkOperatorsOfJoinOperator(joinConds, rsOps, current, correlatedRsOps); correlatedReduceSinkOperators.addAll(correlatedRsOps); } else { correlatedReduceSinkOperators.add(rsop); } } else { LOG.info("Operator " + current.getIdentifier() + " " + current.getName() + " is not correlated"); correlatedReduceSinkOperators.clear(); } } else { LOG.error("ReduceSinkOperator " + current.getIdentifier() + " does not have ColumnExprMap"); throw new SemanticException("CorrelationOptimizer cannot optimize this plan. " + "ReduceSinkOperator " + current.getIdentifier() + " does not have ColumnExprMap"); } return correlatedReduceSinkOperators; } /** Start to exploit Job Flow Correlation from op. * Example: here is the operator tree we have ... * JOIN2 * / \ * RS4 RS5 * / \ * GBY1 JOIN1 * | / \ * RS1 RS2 RS3 * The op will be RS4. If we can execute GBY1, JOIN1, and JOIN2 in * the same reducer. This method will return [RS1, RS2, RS3]. * @param op * @param correlationCtx * @param correlation * @return * @throws SemanticException */ private LinkedHashSet<ReduceSinkOperator> exploitJobFlowCorrelation(ReduceSinkOperator op, CorrelationNodeProcCtx correlationCtx, IntraQueryCorrelation correlation) throws SemanticException { correlationCtx.addWalked(op); correlation.addToAllReduceSinkOperators(op); boolean shouldDetect = true; LinkedHashSet<ReduceSinkOperator> reduceSinkOperators = new LinkedHashSet<ReduceSinkOperator>(); List<ExprNodeDesc> keyCols = op.getConf().getKeyCols(); List<ExprNodeDesc> partitionCols = op.getConf().getPartitionCols(); for (ExprNodeDesc key : keyCols) { if (!(key instanceof ExprNodeColumnDesc)) { shouldDetect = false; } } for (ExprNodeDesc key : partitionCols) { if (!(key instanceof ExprNodeColumnDesc)) { shouldDetect = false; } } GroupByOperator cGBY = CorrelationUtilities.getSingleChild(op, GroupByOperator.class); if (cGBY != null) { if (CorrelationUtilities.hasGroupingSet(op) || cGBY.getConf().isGroupingSetsPresent()) { // Do not support grouping set right now shouldDetect = false; } } if (shouldDetect) { LinkedHashSet<ReduceSinkOperator> newReduceSinkOperators = new LinkedHashSet<ReduceSinkOperator>(); String sortOrder = op.getConf().getOrder(); for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) { LOG.info("Operator " + op.getIdentifier() + ": start detecting correlation from this operator"); LinkedHashSet<ReduceSinkOperator> correlatedReduceSinkOperators = findCorrelatedReduceSinkOperators(op, keyCols, partitionCols, sortOrder, parent, correlation); if (correlatedReduceSinkOperators.size() == 0) { newReduceSinkOperators.add(op); } else { for (ReduceSinkOperator rsop : correlatedReduceSinkOperators) { LinkedHashSet<ReduceSinkOperator> exploited = exploitJobFlowCorrelation(rsop, correlationCtx, correlation); if (exploited.size() == 0) { newReduceSinkOperators.add(rsop); } else { newReduceSinkOperators.addAll(exploited); } } } } reduceSinkOperators.addAll(newReduceSinkOperators); } return reduceSinkOperators; } public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { CorrelationNodeProcCtx corrCtx = (CorrelationNodeProcCtx) ctx; ReduceSinkOperator op = (ReduceSinkOperator) nd; // Check if we have visited this operator if (corrCtx.isWalked(op)) { return null; } LOG.info("Walk to operator " + op.getIdentifier() + " " + op.getName()); Operator<? extends OperatorDesc> child = CorrelationUtilities.getSingleChild(op, true); if (!(child instanceof JoinOperator) && !(child instanceof GroupByOperator)) { corrCtx.addWalked(op); return null; } // detect correlations IntraQueryCorrelation correlation = new IntraQueryCorrelation(corrCtx.minReducer()); List<ReduceSinkOperator> topReduceSinkOperators = CorrelationUtilities.findSiblingReduceSinkOperators(op); List<ReduceSinkOperator> bottomReduceSinkOperators = new ArrayList<ReduceSinkOperator>(); // Adjust the number of reducers of this correlation based on // those top layer ReduceSinkOperators. for (ReduceSinkOperator rsop : topReduceSinkOperators) { if (!correlation.adjustNumReducers(rsop.getConf().getNumReducers())) { // If we have a conflict on the number of reducers, we will not optimize // this plan from here. corrCtx.addWalked(op); return null; } } for (ReduceSinkOperator rsop : topReduceSinkOperators) { LinkedHashSet<ReduceSinkOperator> thisBottomReduceSinkOperators = exploitJobFlowCorrelation(rsop, corrCtx, correlation); if (thisBottomReduceSinkOperators.size() == 0) { thisBottomReduceSinkOperators.add(rsop); } bottomReduceSinkOperators.addAll(thisBottomReduceSinkOperators); } if (!topReduceSinkOperators.containsAll(bottomReduceSinkOperators)) { LOG.info("has job flow correlation"); correlation.setJobFlowCorrelation(true, bottomReduceSinkOperators); } if (correlation.hasJobFlowCorrelation()) { corrCtx.addCorrelation(correlation); } else { // Since we cannot merge operators into a single MR job from here, // we should remove ReduceSinkOperators added into walked in exploitJFC corrCtx.removeWalkedAll(correlation.getAllReduceSinkOperators()); } corrCtx.addWalked(op); return null; } } private NodeProcessor getDefaultProc() { return new NodeProcessor() { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; LOG.info("Walk to operator " + op.getIdentifier() + " " + op.getName() + ". No actual work to do"); CorrelationNodeProcCtx correlationCtx = (CorrelationNodeProcCtx) ctx; if (op.getName().equals(MapJoinOperator.getOperatorName())) { correlationCtx.setAbort(true); correlationCtx.getAbortReasons().add("Found MAPJOIN"); } if (op.getName().equals(FileSinkOperator.getOperatorName())) { correlationCtx.incrementFileSinkOperatorCount(); } return null; } }; } protected class CorrelationNodeProcCtx extends AbstractCorrelationProcCtx { private boolean abort; private final List<String> abortReasons; private final Set<ReduceSinkOperator> walked; private final List<IntraQueryCorrelation> correlations; private int fileSinkOperatorCount; public CorrelationNodeProcCtx(ParseContext pctx) { super(pctx); walked = new HashSet<ReduceSinkOperator>(); correlations = new ArrayList<IntraQueryCorrelation>(); abort = false; abortReasons = new ArrayList<String>(); fileSinkOperatorCount = 0; } public void setAbort(boolean abort) { this.abort = abort; } public boolean isAbort() { return abort; } public List<String> getAbortReasons() { return abortReasons; } public void addCorrelation(IntraQueryCorrelation correlation) { correlations.add(correlation); } public List<IntraQueryCorrelation> getCorrelations() { return correlations; } public boolean isWalked(ReduceSinkOperator op) { return walked.contains(op); } public void addWalked(ReduceSinkOperator op) { walked.add(op); } public void addWalkedAll(Collection<ReduceSinkOperator> c) { walked.addAll(c); } public void removeWalked(ReduceSinkOperator op) { walked.remove(op); } public void removeWalkedAll(Collection<ReduceSinkOperator> c) { walked.removeAll(c); } public void incrementFileSinkOperatorCount() { fileSinkOperatorCount++; if (fileSinkOperatorCount == 2) { abort = true; abortReasons.add( "-- Currently, a query with multiple FileSinkOperators are not supported."); } } } }