/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.plan; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * Join operator Descriptor implementation. * */ @Explain(displayName = "Join Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class JoinDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; public static final int INNER_JOIN = 0; public static final int LEFT_OUTER_JOIN = 1; public static final int RIGHT_OUTER_JOIN = 2; public static final int FULL_OUTER_JOIN = 3; public static final int UNIQUE_JOIN = 4; public static final int LEFT_SEMI_JOIN = 5; // used to handle skew join private boolean handleSkewJoin = false; private int skewKeyDefinition = -1; private Map<Byte, Path> bigKeysDirMap; private Map<Byte, Map<Byte, Path>> smallKeysDirMap; private Map<Byte, TableDesc> skewKeysValuesTables; // alias to key mapping private Map<Byte, List<ExprNodeDesc>> exprs; // alias to filter mapping private Map<Byte, List<ExprNodeDesc>> filters; private List<ExprNodeDesc> residualFilterExprs; // pos of outer join alias=<pos of other alias:num of filters on outer join alias>xn // for example, // a left outer join b on a.k=b.k AND a.k>5 full outer join c on a.k=c.k AND a.k>10 AND c.k>20 // // That means on a(pos=0), there are overlapped filters associated with b(pos=1) and c(pos=2). // (a)b has one filter on a (a.k>5) and (a)c also has one filter on a (a.k>10), // making filter map for a as 0=1:1:2:1. // C also has one outer join filter associated with A(c.k>20), which is making 2=0:1 private int[][] filterMap; // key index to nullsafe join flag private boolean[] nullsafes; // used for create joinOutputObjectInspector protected List<String> outputColumnNames; // key:column output name, value:tag private transient Map<String, Byte> reversedExprs; // No outer join involved protected boolean noOuterJoin; protected JoinCondDesc[] conds; protected Byte[] tagOrder; private TableDesc keyTableDesc; // this operator cannot be converted to mapjoin cause output is expected to be sorted on join key // it's resulted from RS-dedup optimization, which removes following RS under some condition private boolean fixedAsSorted; // used only for explain. private transient ExprNodeDesc [][] joinKeys; // Data structures coming originally from QBJoinTree private transient String leftAlias; private transient String[] leftAliases; private transient String[] rightAliases; private transient String[] baseSrc; private transient String id; private transient boolean mapSideJoin; private transient List<String> mapAliases; //map-side join aliases private transient Map<String, Operator<? extends OperatorDesc>> aliasToOpInfo; private transient boolean leftInputJoin; private transient List<String> streamAliases; // represents the total memory that this Join operator will use if it is a MapJoin operator protected transient long inMemoryDataSize; // non-transient field, used at runtime to kill a task if it exceeded memory limits when running in LLAP protected MemoryMonitorInfo memoryMonitorInfo; public JoinDesc() { } public JoinDesc(final Map<Byte, List<ExprNodeDesc>> exprs, List<String> outputColumnNames, final boolean noOuterJoin, final JoinCondDesc[] conds, final Map<Byte, List<ExprNodeDesc>> filters, ExprNodeDesc[][] joinKeys, final MemoryMonitorInfo memoryMonitorInfo) { this.exprs = exprs; this.outputColumnNames = outputColumnNames; this.noOuterJoin = noOuterJoin; this.conds = conds; this.filters = filters; this.joinKeys = joinKeys; this.memoryMonitorInfo = memoryMonitorInfo; resetOrder(); } // called by late-MapJoin processor (hive.auto.convert.join=true for example) public void resetOrder() { tagOrder = new Byte[exprs.size()]; for (int i = 0; i < tagOrder.length; i++) { tagOrder[i] = (byte) i; } } @Override public Object clone() { JoinDesc ret = new JoinDesc(); Map<Byte,List<ExprNodeDesc>> cloneExprs = new HashMap<Byte,List<ExprNodeDesc>>(); cloneExprs.putAll(getExprs()); ret.setExprs(cloneExprs); Map<Byte,List<ExprNodeDesc>> cloneFilters = new HashMap<Byte,List<ExprNodeDesc>>(); cloneFilters.putAll(getFilters()); ret.setFilters(cloneFilters); ret.setConds(getConds().clone()); ret.setNoOuterJoin(getNoOuterJoin()); ret.setNullSafes(getNullSafes()); ret.setHandleSkewJoin(handleSkewJoin); ret.setSkewKeyDefinition(getSkewKeyDefinition()); ret.setTagOrder(getTagOrder().clone()); if (getMemoryMonitorInfo() != null) { ret.setMemoryMonitorInfo(new MemoryMonitorInfo(getMemoryMonitorInfo())); } if (getKeyTableDesc() != null) { ret.setKeyTableDesc((TableDesc) getKeyTableDesc().clone()); } if (getBigKeysDirMap() != null) { Map<Byte, Path> cloneBigKeysDirMap = new HashMap<Byte, Path>(); cloneBigKeysDirMap.putAll(getBigKeysDirMap()); ret.setBigKeysDirMap(cloneBigKeysDirMap); } if (getSmallKeysDirMap() != null) { Map<Byte, Map<Byte, Path>> cloneSmallKeysDirMap = new HashMap<Byte, Map<Byte,Path>> (); cloneSmallKeysDirMap.putAll(getSmallKeysDirMap()); ret.setSmallKeysDirMap(cloneSmallKeysDirMap); } if (getSkewKeysValuesTables() != null) { Map<Byte, TableDesc> cloneSkewKeysValuesTables = new HashMap<Byte, TableDesc>(); cloneSkewKeysValuesTables.putAll(getSkewKeysValuesTables()); ret.setSkewKeysValuesTables(cloneSkewKeysValuesTables); } if (getOutputColumnNames() != null) { List<String> cloneOutputColumnNames = new ArrayList<String>(); cloneOutputColumnNames.addAll(getOutputColumnNames()); ret.setOutputColumnNames(cloneOutputColumnNames); } if (getReversedExprs() != null) { Map<String, Byte> cloneReversedExprs = new HashMap<String, Byte>(); cloneReversedExprs.putAll(getReversedExprs()); ret.setReversedExprs(cloneReversedExprs); } return ret; } public JoinDesc(JoinDesc clone) { this.bigKeysDirMap = clone.bigKeysDirMap; this.conds = clone.conds; this.exprs = clone.exprs; this.nullsafes = clone.nullsafes; this.handleSkewJoin = clone.handleSkewJoin; this.keyTableDesc = clone.keyTableDesc; this.noOuterJoin = clone.noOuterJoin; this.outputColumnNames = clone.outputColumnNames; this.reversedExprs = clone.reversedExprs; this.skewKeyDefinition = clone.skewKeyDefinition; this.skewKeysValuesTables = clone.skewKeysValuesTables; this.smallKeysDirMap = clone.smallKeysDirMap; this.tagOrder = clone.tagOrder; this.filters = clone.filters; this.filterMap = clone.filterMap; this.residualFilterExprs = clone.residualFilterExprs; this.statistics = clone.statistics; this.inMemoryDataSize = clone.inMemoryDataSize; this.memoryMonitorInfo = clone.memoryMonitorInfo; } public Map<Byte, List<ExprNodeDesc>> getExprs() { return exprs; } public Map<String, Byte> getReversedExprs() { return reversedExprs; } public void setReversedExprs(Map<String, Byte> reversedExprs) { this.reversedExprs = reversedExprs; } /** * @return the keys in string form */ @Explain(displayName = "keys") public Map<Byte, String> getKeysString() { if (joinKeys == null) { return null; } Map<Byte, String> keyMap = new LinkedHashMap<Byte, String>(); for (byte i = 0; i < joinKeys.length; i++) { keyMap.put(i, PlanUtils.getExprListString(Arrays.asList(joinKeys[i]))); } return keyMap; } @Explain(displayName = "keys", explainLevels = { Level.USER }) public Map<Byte, String> getUserLevelExplainKeysString() { if (joinKeys == null) { return null; } Map<Byte, String> keyMap = new LinkedHashMap<Byte, String>(); for (byte i = 0; i < joinKeys.length; i++) { keyMap.put(i, PlanUtils.getExprListString(Arrays.asList(joinKeys[i]), true)); } return keyMap; } public void setExprs(final Map<Byte, List<ExprNodeDesc>> exprs) { this.exprs = exprs; } /** * Get the string representation of filters. * * Returns null if they are no filters. * * @return Map from alias to filters on the alias. */ @Explain(displayName = "filter predicates") public Map<Byte, String> getFiltersStringMap() { if (getFilters() == null || getFilters().size() == 0) { return null; } LinkedHashMap<Byte, String> ret = new LinkedHashMap<Byte, String>(); boolean filtersPresent = false; for (Map.Entry<Byte, List<ExprNodeDesc>> ent : getFilters().entrySet()) { StringBuilder sb = new StringBuilder(); boolean first = true; if (ent.getValue() != null) { if (ent.getValue().size() != 0) { filtersPresent = true; } for (ExprNodeDesc expr : ent.getValue()) { if (!first) { sb.append(" "); } first = false; sb.append("{"); sb.append(expr.getExprString()); sb.append("}"); } } ret.put(ent.getKey(), sb.toString()); } if (filtersPresent) { return ret; } else { return null; } } public Map<Byte, List<ExprNodeDesc>> getFilters() { return filters; } public void setFilters(Map<Byte, List<ExprNodeDesc>> filters) { this.filters = filters; } @Explain(displayName = "residual filter predicates", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getResidualFilterExprsString() { if (getResidualFilterExprs() == null || getResidualFilterExprs().size() == 0) { return null; } StringBuilder sb = new StringBuilder(); boolean first = true; for (ExprNodeDesc expr : getResidualFilterExprs()) { if (!first) { sb.append(" "); } first = false; sb.append("{"); sb.append(expr.getExprString()); sb.append("}"); } return sb.toString(); } public List<ExprNodeDesc> getResidualFilterExprs() { return residualFilterExprs; } public void setResidualFilterExprs(List<ExprNodeDesc> residualFilterExprs) { this.residualFilterExprs = residualFilterExprs; } @Explain(displayName = "outputColumnNames") public List<String> getOutputColumnNames() { return outputColumnNames; } @Explain(displayName = "Output", explainLevels = { Level.USER }) public List<String> getUserLevelExplainOutputColumnNames() { return outputColumnNames; } public void setOutputColumnNames( List<String> outputColumnNames) { this.outputColumnNames = outputColumnNames; } public boolean getNoOuterJoin() { return noOuterJoin; } public void setNoOuterJoin(final boolean noOuterJoin) { this.noOuterJoin = noOuterJoin; } @Explain(displayName = "condition map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List<JoinCondDesc> getCondsList() { if (conds == null) { return null; } ArrayList<JoinCondDesc> l = new ArrayList<JoinCondDesc>(); for (JoinCondDesc cond : conds) { l.add(cond); } return l; } public ExprNodeDesc [][] getJoinKeys() { return joinKeys; } public JoinCondDesc[] getConds() { return conds; } public void setConds(final JoinCondDesc[] conds) { this.conds = conds; } /** * The order in which tables should be processed when joining. * * @return Array of tags */ public Byte[] getTagOrder() { return tagOrder; } /** * The order in which tables should be processed when joining. * * @param tagOrder * Array of tags */ public void setTagOrder(Byte[] tagOrder) { this.tagOrder = tagOrder; } @Explain(displayName = "handleSkewJoin", displayOnlyOnTrue = true) public boolean getHandleSkewJoin() { return handleSkewJoin; } /** * set to handle skew join in this join op. * * @param handleSkewJoin */ public void setHandleSkewJoin(boolean handleSkewJoin) { this.handleSkewJoin = handleSkewJoin; } /** * @return mapping from tbl to dir for big keys. */ public Map<Byte, Path> getBigKeysDirMap() { return bigKeysDirMap; } /** * set the mapping from tbl to dir for big keys. * * @param bigKeysDirMap */ public void setBigKeysDirMap(Map<Byte, Path> bigKeysDirMap) { this.bigKeysDirMap = bigKeysDirMap; } /** * @return mapping from tbl to dir for small keys */ public Map<Byte, Map<Byte, Path>> getSmallKeysDirMap() { return smallKeysDirMap; } /** * set the mapping from tbl to dir for small keys. * * @param smallKeysDirMap */ public void setSmallKeysDirMap(Map<Byte, Map<Byte, Path>> smallKeysDirMap) { this.smallKeysDirMap = smallKeysDirMap; } /** * @return skew key definition. If we see a key's associated entries' number * is bigger than this, we will define this key as a skew key. */ public int getSkewKeyDefinition() { return skewKeyDefinition; } /** * set skew key definition. * * @param skewKeyDefinition */ public void setSkewKeyDefinition(int skewKeyDefinition) { this.skewKeyDefinition = skewKeyDefinition; } /** * @return the table desc for storing skew keys and their corresponding value; */ public Map<Byte, TableDesc> getSkewKeysValuesTables() { return skewKeysValuesTables; } /** * @param skewKeysValuesTables * set the table desc for storing skew keys and their corresponding * value; */ public void setSkewKeysValuesTables(Map<Byte, TableDesc> skewKeysValuesTables) { this.skewKeysValuesTables = skewKeysValuesTables; } public boolean isNoOuterJoin() { return noOuterJoin; } public void setKeyTableDesc(TableDesc keyTblDesc) { keyTableDesc = keyTblDesc; } public TableDesc getKeyTableDesc() { return keyTableDesc; } public boolean[] getNullSafes() { return nullsafes; } public void setNullSafes(boolean[] nullSafes) { this.nullsafes = nullSafes; } @Explain(displayName = "nullSafes") public String getNullSafeString() { if (nullsafes == null) { return null; } boolean hasNS = false; for (boolean ns : nullsafes) { hasNS |= ns; } return hasNS ? Arrays.toString(nullsafes) : null; } public int[][] getFilterMap() { return filterMap; } public void setFilterMap(int[][] filterMap) { this.filterMap = filterMap; } @Explain(displayName = "filter mappings", explainLevels = { Level.EXTENDED }) public Map<Integer, String> getFilterMapString() { return toCompactString(filterMap); } protected Map<Integer, String> toCompactString(int[][] filterMap) { if (filterMap == null) { return null; } filterMap = compactFilter(filterMap); Map<Integer, String> result = new LinkedHashMap<Integer, String>(); for (int i = 0 ; i < filterMap.length; i++) { if (filterMap[i] == null) { continue; } result.put(i, Arrays.toString(filterMap[i])); } return result.isEmpty() ? null : result; } // remove filterMap for outer alias if filter is not exist on that private int[][] compactFilter(int[][] filterMap) { if (filterMap == null) { return null; } for (int i = 0; i < filterMap.length; i++) { if (filterMap[i] != null) { boolean noFilter = true; // join positions for even index, filter lengths for odd index for (int j = 1; j < filterMap[i].length; j += 2) { if (filterMap[i][j] > 0) { noFilter = false; break; } } if (noFilter) { filterMap[i] = null; } } } for (int[] mapping : filterMap) { if (mapping != null) { return filterMap; } } return null; } public int getTagLength() { int tagLength = -1; for (byte tag : getExprs().keySet()) { tagLength = Math.max(tagLength, tag + 1); } return tagLength; } @SuppressWarnings("unchecked") public <T> T[] convertToArray(Map<Byte, T> source, Class<T> compType) { T[] result = (T[]) Array.newInstance(compType, getTagLength()); for (Map.Entry<Byte, T> entry : source.entrySet()) { result[entry.getKey()] = entry.getValue(); } return result; } public boolean isFixedAsSorted() { return fixedAsSorted; } public void setFixedAsSorted(boolean fixedAsSorted) { this.fixedAsSorted = fixedAsSorted; } public String[] getLeftAliases() { return leftAliases; } public String[] getBaseSrc() { return baseSrc; } public void setBaseSrc(String[] baseSrc) { this.baseSrc = baseSrc; } public String getId() { return id; } public List<String> getMapAliases() { return mapAliases; } public Map<String, Operator<? extends OperatorDesc>> getAliasToOpInfo() { return aliasToOpInfo; } public void setAliasToOpInfo(Map<String, Operator<? extends OperatorDesc>> aliasToOpInfo) { this.aliasToOpInfo = aliasToOpInfo; } public boolean isLeftInputJoin() { return leftInputJoin; } public String getLeftAlias() { return leftAlias; } public void setLeftAlias(String leftAlias) { this.leftAlias = leftAlias; } public String[] getRightAliases() { return rightAliases; } public void setRightAliases(String[] rightAliases) { this.rightAliases = rightAliases; } public List<String> getStreamAliases() { return streamAliases; } public boolean isMapSideJoin() { return mapSideJoin; } public void setQBJoinTreeProps(JoinDesc joinDesc) { leftAlias = joinDesc.leftAlias; leftAliases = joinDesc.leftAliases; rightAliases = joinDesc.rightAliases; baseSrc = joinDesc.baseSrc; id = joinDesc.id; mapSideJoin = joinDesc.mapSideJoin; mapAliases = joinDesc.mapAliases; aliasToOpInfo = joinDesc.aliasToOpInfo; leftInputJoin = joinDesc.leftInputJoin; streamAliases = joinDesc.streamAliases; } public void setQBJoinTreeProps(QBJoinTree joinTree) { leftAlias = joinTree.getLeftAlias(); leftAliases = joinTree.getLeftAliases(); rightAliases = joinTree.getRightAliases(); baseSrc = joinTree.getBaseSrc(); id = joinTree.getId(); mapSideJoin = joinTree.isMapSideJoin(); mapAliases = joinTree.getMapAliases(); aliasToOpInfo = joinTree.getAliasToOpInfo(); leftInputJoin = joinTree.getJoinSrc() != null; streamAliases = joinTree.getStreamAliases(); } public void cloneQBJoinTreeProps(JoinDesc joinDesc) { leftAlias = joinDesc.leftAlias; leftAliases = joinDesc.leftAliases == null ? null : joinDesc.leftAliases.clone(); rightAliases = joinDesc.rightAliases == null ? null : joinDesc.rightAliases.clone(); baseSrc = joinDesc.baseSrc == null ? null : joinDesc.baseSrc.clone(); id = joinDesc.id; mapSideJoin = joinDesc.mapSideJoin; mapAliases = joinDesc.mapAliases == null ? null : new ArrayList<String>(joinDesc.mapAliases); aliasToOpInfo = new HashMap<String, Operator<? extends OperatorDesc>>(joinDesc.aliasToOpInfo); leftInputJoin = joinDesc.leftInputJoin; streamAliases = joinDesc.streamAliases == null ? null : new ArrayList<String>(joinDesc.streamAliases); } public MemoryMonitorInfo getMemoryMonitorInfo() { return memoryMonitorInfo; } public void setMemoryMonitorInfo(final MemoryMonitorInfo memoryMonitorInfo) { this.memoryMonitorInfo = memoryMonitorInfo; } public long getInMemoryDataSize() { return inMemoryDataSize; } public void setInMemoryDataSize(final long inMemoryDataSize) { this.inMemoryDataSize = inMemoryDataSize; } }