/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; /** * Join operator implementation. */ public abstract class CommonJoinOperator<T extends JoinDesc> extends Operator<T> implements Serializable { private static final long serialVersionUID = 1L; protected static final Log LOG = LogFactory.getLog(CommonJoinOperator.class .getName()); /** * IntermediateObject. * */ public static class IntermediateObject { ArrayList<Object>[] objs; int curSize; public IntermediateObject(ArrayList<Object>[] objs, int curSize) { this.objs = objs; this.curSize = curSize; } public ArrayList<Object>[] getObjs() { return objs; } public int getCurSize() { return curSize; } public void pushObj(ArrayList<Object> newObj) { objs[curSize++] = newObj; } public void popObj() { curSize--; } public Object topObj() { return objs[curSize - 1]; } } protected transient int numAliases; // number of aliases /** * The expressions for join inputs. */ protected transient Map<Byte, List<ExprNodeEvaluator>> joinValues; /** * The filters for join */ protected transient Map<Byte, List<ExprNodeEvaluator>> joinFilters; /** * The ObjectInspectors for the join inputs. */ protected transient Map<Byte, List<ObjectInspector>> joinValuesObjectInspectors; /** * The ObjectInspectors for join filters. */ protected transient Map<Byte, List<ObjectInspector>> joinFilterObjectInspectors; /** * The standard ObjectInspectors for the join inputs. */ protected transient Map<Byte, List<ObjectInspector>> joinValuesStandardObjectInspectors; /** * The standard ObjectInspectors for the row container. */ protected transient Map<Byte, List<ObjectInspector>> rowContainerStandardObjectInspectors; protected transient Byte[] order; // order in which the results should // be output protected transient JoinCondDesc[] condn; protected transient boolean[] nullsafes; public transient boolean noOuterJoin; protected transient Object[] dummyObj; // for outer joins, contains the // potential nulls for the concerned // aliases protected transient RowContainer<ArrayList<Object>>[] dummyObjVectors; // empty // rows // for // each // table protected transient int totalSz; // total size of the composite object // keys are the column names. basically this maps the position of the column // in // the output of the CommonJoinOperator to the input columnInfo. private transient Map<Integer, Set<String>> posToAliasMap; transient LazyBinarySerDe[] spillTableSerDe; protected transient Map<Byte, TableDesc> spillTableDesc; // spill tables are // used if the join // input is too large // to fit in memory HashMap<Byte, AbstractRowContainer<ArrayList<Object>>> storage; // map b/w table alias // to RowContainer int joinEmitInterval = -1; int joinCacheSize = 0; int nextSz = 0; transient Byte lastAlias = null; transient boolean handleSkewJoin = false; transient boolean hasLeftSemiJoin = false; protected transient int countAfterReport; protected transient int heartbeatInterval; protected static final int NOTSKIPBIGTABLE = -1; public CommonJoinOperator() { } public CommonJoinOperator(CommonJoinOperator<T> clone) { this.joinEmitInterval = clone.joinEmitInterval; this.joinCacheSize = clone.joinCacheSize; this.nextSz = clone.nextSz; this.childOperators = clone.childOperators; this.parentOperators = clone.parentOperators; this.counterNames = clone.counterNames; this.counterNameToEnum = clone.counterNameToEnum; this.done = clone.done; this.operatorId = clone.operatorId; this.storage = clone.storage; this.condn = clone.condn; this.conf = clone.getConf(); this.setSchema(clone.getSchema()); this.alias = clone.alias; this.beginTime = clone.beginTime; this.inputRows = clone.inputRows; this.childOperatorsArray = clone.childOperatorsArray; this.childOperatorsTag = clone.childOperatorsTag; this.colExprMap = clone.colExprMap; this.counters = clone.counters; this.dummyObj = clone.dummyObj; this.dummyObjVectors = clone.dummyObjVectors; this.forwardCache = clone.forwardCache; this.groupKeyObject = clone.groupKeyObject; this.handleSkewJoin = clone.handleSkewJoin; this.hconf = clone.hconf; this.id = clone.id; this.inputObjInspectors = clone.inputObjInspectors; this.inputRows = clone.inputRows; this.noOuterJoin = clone.noOuterJoin; this.numAliases = clone.numAliases; this.operatorId = clone.operatorId; this.posToAliasMap = clone.posToAliasMap; this.spillTableDesc = clone.spillTableDesc; this.statsMap = clone.statsMap; this.joinFilters = clone.joinFilters; this.joinFilterObjectInspectors = clone.joinFilterObjectInspectors; } protected static <T extends JoinDesc> ObjectInspector getJoinOutputObjectInspector( Byte[] order, Map<Byte, List<ObjectInspector>> aliasToObjectInspectors, T conf) { ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(); for (Byte alias : order) { List<ObjectInspector> oiList = aliasToObjectInspectors.get(alias); structFieldObjectInspectors.addAll(oiList); } StructObjectInspector joinOutputObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(conf.getOutputColumnNames(), structFieldObjectInspectors); return joinOutputObjectInspector; } Configuration hconf; @Override protected void initializeOp(Configuration hconf) throws HiveException { this.handleSkewJoin = conf.getHandleSkewJoin(); this.hconf = hconf; heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); countAfterReport = 0; totalSz = 0; // Map that contains the rows for each alias storage = new HashMap<Byte, AbstractRowContainer<ArrayList<Object>>>(); numAliases = conf.getExprs().size(); joinValues = new HashMap<Byte, List<ExprNodeEvaluator>>(); joinFilters = new HashMap<Byte, List<ExprNodeEvaluator>>(); order = conf.getTagOrder(); condn = conf.getConds(); nullsafes = conf.getNullSafes(); noOuterJoin = conf.isNoOuterJoin(); totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), order,NOTSKIPBIGTABLE); //process join filters joinFilters = new HashMap<Byte, List<ExprNodeEvaluator>>(); JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE); joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors,NOTSKIPBIGTABLE); joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors,NOTSKIPBIGTABLE); joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors( joinValuesObjectInspectors,NOTSKIPBIGTABLE); if (noOuterJoin) { rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors; } else { Map<Byte, List<ObjectInspector>> rowContainerObjectInspectors = new HashMap<Byte, List<ObjectInspector>>(); for (Byte alias : order) { ArrayList<ObjectInspector> rcOIs = new ArrayList<ObjectInspector>(); rcOIs.addAll(joinValuesObjectInspectors.get(alias)); // for each alias, add object inspector for boolean as the last element rcOIs.add( PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); rowContainerObjectInspectors.put(alias, rcOIs); } rowContainerStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE); } dummyObj = new Object[numAliases]; dummyObjVectors = new RowContainer[numAliases]; joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINEMITINTERVAL); joinCacheSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINCACHESIZE); // construct dummy null row (indicating empty table) and // construct spill table serde which is used if input is too // large to fit into main memory. byte pos = 0; for (Byte alias : order) { int sz = conf.getExprs().get(alias).size(); ArrayList<Object> nr = new ArrayList<Object>(sz); for (int j = 0; j < sz; j++) { nr.add(null); } if (!noOuterJoin) { // add whether the row is filtered or not // this value does not matter for the dummyObj // because the join values are already null nr.add(new BooleanWritable(false)); } dummyObj[pos] = nr; // there should be only 1 dummy object in the RowContainer RowContainer<ArrayList<Object>> values = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get((byte)pos), alias, 1, spillTableDesc, conf, noOuterJoin); values.add((ArrayList<Object>) dummyObj[pos]); dummyObjVectors[pos] = values; // if serde is null, the input doesn't need to be spilled out // e.g., the output columns does not contains the input table RowContainer rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get((byte)pos), alias, joinCacheSize,spillTableDesc, conf,noOuterJoin); storage.put(pos, rc); pos++; } forwardCache = new Object[totalSz]; outputObjInspector = getJoinOutputObjectInspector(order, joinValuesStandardObjectInspectors, conf); for( int i = 0; i < condn.length; i++ ) { if(condn[i].getType() == JoinDesc.LEFT_SEMI_JOIN) { hasLeftSemiJoin = true; } } LOG.info("JOIN " + ((StructObjectInspector) outputObjInspector).getTypeName() + " totalsz = " + totalSz); } transient boolean newGroupStarted = false; @Override public void startGroup() throws HiveException { LOG.trace("Join: Starting new group"); newGroupStarted = true; for (AbstractRowContainer<ArrayList<Object>> alw : storage.values()) { alw.clear(); } } protected int getNextSize(int sz) { // A very simple counter to keep track of join entries for a key if (sz >= 100000) { return sz + 100000; } return 2 * sz; } protected transient Byte alias; transient Object[] forwardCache; private void createForwardJoinObject(IntermediateObject intObj, boolean[] nullsArr) throws HiveException { int p = 0; for (int i = 0; i < numAliases; i++) { Byte alias = order[i]; int sz = joinValues.get(alias).size(); if (nullsArr[i]) { for (int j = 0; j < sz; j++) { forwardCache[p++] = null; } } else { ArrayList<Object> obj = intObj.getObjs()[i]; for (int j = 0; j < sz; j++) { forwardCache[p++] = obj.get(j); } } } forward(forwardCache, outputObjInspector); countAfterReport = 0; } private void copyOldArray(boolean[] src, boolean[] dest) { for (int i = 0; i < src.length; i++) { dest[i] = src[i]; } } private ArrayList<boolean[]> joinObjectsInnerJoin( ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull) { if (newObjNull) { return resNulls; } Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); boolean oldObjNull = oldNulls[left]; if (!oldObjNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); newNulls[oldNulls.length] = false; resNulls.add(newNulls); } } return resNulls; } /** * Implement semi join operator. */ private ArrayList<boolean[]> joinObjectsLeftSemiJoin( ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull) { if (newObjNull) { return resNulls; } Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); boolean oldObjNull = oldNulls[left]; if (!oldObjNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); newNulls[oldNulls.length] = false; resNulls.add(newNulls); } } return resNulls; } private ArrayList<boolean[]> joinObjectsLeftOuterJoin( ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull) { // newObj is null if is already null or // if the row corresponding to the left alias does not pass through filter int filterIndex = joinValues.get(order[left]).size(); if(filterIndex < intObj.getObjs()[left].size()) { newObjNull = newObjNull || ((BooleanWritable) (intObj.getObjs()[left].get(filterIndex))).get(); } Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); boolean oldObjNull = oldNulls[left]; boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); if (oldObjNull) { newNulls[oldNulls.length] = true; } else { newNulls[oldNulls.length] = newObjNull; } resNulls.add(newNulls); } return resNulls; } private ArrayList<boolean[]> joinObjectsRightOuterJoin( ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull, boolean firstRow) { if (newObjNull) { return resNulls; } if (inputNulls.isEmpty() && firstRow) { boolean[] newNulls = new boolean[intObj.getCurSize()]; for (int i = 0; i < intObj.getCurSize() - 1; i++) { newNulls[i] = true; } newNulls[intObj.getCurSize() - 1] = newObjNull; resNulls.add(newNulls); return resNulls; } boolean allOldObjsNull = firstRow; Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); if (!oldNulls[left]) { allOldObjsNull = false; break; } } // if the row does not pass through filter, all old Objects are null if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) { allOldObjsNull = true; } nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); boolean oldObjNull = oldNulls[left] || allOldObjsNull; if (!oldObjNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); newNulls[oldNulls.length] = newObjNull; resNulls.add(newNulls); } else if (allOldObjsNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; for (int i = 0; i < intObj.getCurSize() - 1; i++) { newNulls[i] = true; } newNulls[oldNulls.length] = newObjNull; resNulls.add(newNulls); return resNulls; } } return resNulls; } private ArrayList<boolean[]> joinObjectsFullOuterJoin( ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull, boolean firstRow) { if (newObjNull) { Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); newNulls[oldNulls.length] = newObjNull; resNulls.add(newNulls); } return resNulls; } if (inputNulls.isEmpty() && firstRow) { boolean[] newNulls = new boolean[intObj.getCurSize()]; for (int i = 0; i < intObj.getCurSize() - 1; i++) { newNulls[i] = true; } newNulls[intObj.getCurSize() - 1] = newObjNull; resNulls.add(newNulls); return resNulls; } boolean allOldObjsNull = firstRow; Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); if (!oldNulls[left]) { allOldObjsNull = false; break; } } // if the row does not pass through filter, all old Objects are null if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) { allOldObjsNull = true; } boolean rhsPreserved = false; nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] oldNulls = nullsIter.next(); // old obj is null even if the row corresponding to the left alias // does not pass through filter boolean oldObjNull = oldNulls[left] || ((BooleanWritable) (intObj.getObjs()[left].get(joinValues.get(order[left]).size()))).get() || allOldObjsNull; if (!oldObjNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); newNulls[oldNulls.length] = newObjNull; resNulls.add(newNulls); } else if (oldObjNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); newNulls[oldNulls.length] = true; resNulls.add(newNulls); if (allOldObjsNull && !rhsPreserved) { newNulls = new boolean[intObj.getCurSize()]; for (int i = 0; i < oldNulls.length; i++) { newNulls[i] = true; } newNulls[oldNulls.length] = false; resNulls.add(newNulls); rhsPreserved = true; } } } return resNulls; } /* * The new input is added to the list of existing inputs. Each entry in the * array of inputNulls denotes the entries in the intermediate object to be * used. The intermediate object is augmented with the new object, and list of * nulls is changed appropriately. The list will contain all non-nulls for a * inner join. The outer joins are processed appropriately. */ private ArrayList<boolean[]> joinObjects(ArrayList<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int joinPos, boolean firstRow) { ArrayList<boolean[]> resNulls = new ArrayList<boolean[]>(); boolean newObjNull = newObj == dummyObj[joinPos] ? true : false; if (joinPos == 0) { if (newObjNull) { return null; } boolean[] nulls = new boolean[1]; nulls[0] = newObjNull; resNulls.add(nulls); return resNulls; } int left = condn[joinPos - 1].getLeft(); int type = condn[joinPos - 1].getType(); // process all nulls for RIGHT and FULL OUTER JOINS if (((type == JoinDesc.RIGHT_OUTER_JOIN) || (type == JoinDesc.FULL_OUTER_JOIN)) && !newObjNull && (inputNulls == null) && firstRow) { boolean[] newNulls = new boolean[intObj.getCurSize()]; for (int i = 0; i < newNulls.length - 1; i++) { newNulls[i] = true; } newNulls[newNulls.length - 1] = false; resNulls.add(newNulls); return resNulls; } if (inputNulls == null) { return null; } if (type == JoinDesc.INNER_JOIN) { return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull); } else if (type == JoinDesc.LEFT_OUTER_JOIN) { return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull); } else if (type == JoinDesc.RIGHT_OUTER_JOIN) { return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull, firstRow); } else if (type == JoinDesc.LEFT_SEMI_JOIN) { return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull); } assert (type == JoinDesc.FULL_OUTER_JOIN); return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull, firstRow); } /* * genObject is a recursive function. For the inputs, a array of bitvectors is * maintained (inputNulls) where each entry denotes whether the element is to * be used or not (whether it is null or not). The size of the bitvector is * same as the number of inputs under consideration currently. When all inputs * are accounted for, the output is forwarded appropriately. */ private void genObject(ArrayList<boolean[]> inputNulls, int aliasNum, IntermediateObject intObj, boolean firstRow) throws HiveException { boolean childFirstRow = firstRow; boolean skipping = false; if (aliasNum < numAliases) { // search for match in the rhs table AbstractRowContainer<ArrayList<Object>> aliasRes = storage.get(order[aliasNum]); for (ArrayList<Object> newObj = aliasRes.first(); newObj != null; newObj = aliasRes .next()) { // check for skipping in case of left semi join if (aliasNum > 0 && condn[aliasNum - 1].getType() == JoinDesc.LEFT_SEMI_JOIN && newObj != dummyObj[aliasNum]) { // successful match skipping = true; } intObj.pushObj(newObj); // execute the actual join algorithm ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj, aliasNum, childFirstRow); // recursively call the join the other rhs tables genObject(newNulls, aliasNum + 1, intObj, firstRow); intObj.popObj(); firstRow = false; // if left-semi-join found a match, skipping the rest of the rows in the // rhs table of the semijoin if (skipping) { break; } } } else { if (inputNulls == null) { return; } Iterator<boolean[]> nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { boolean[] nullsVec = nullsIter.next(); createForwardJoinObject(intObj, nullsVec); } } } /** * Forward a record of join results. * * @throws HiveException */ @Override public void endGroup() throws HiveException { LOG.trace("Join Op: endGroup called: numValues=" + numAliases); checkAndGenObject(); } private void genUniqueJoinObject(int aliasNum, int forwardCachePos) throws HiveException { AbstractRowContainer<ArrayList<Object>> alias = storage.get(order[aliasNum]); for (ArrayList<Object> row = alias.first(); row != null; row = alias.next()) { int sz = joinValues.get(order[aliasNum]).size(); int p = forwardCachePos; for (int j = 0; j < sz; j++) { forwardCache[p++] = row.get(j); } if (aliasNum == numAliases - 1) { forward(forwardCache, outputObjInspector); countAfterReport = 0; } else { genUniqueJoinObject(aliasNum + 1, p); } } } private void genAllOneUniqueJoinObject() throws HiveException { int p = 0; for (int i = 0; i < numAliases; i++) { int sz = joinValues.get(order[i]).size(); ArrayList<Object> obj = storage.get(order[i]).first(); for (int j = 0; j < sz; j++) { forwardCache[p++] = obj.get(j); } } forward(forwardCache, outputObjInspector); countAfterReport = 0; } protected void checkAndGenObject() throws HiveException { if (condn[0].getType() == JoinDesc.UNIQUE_JOIN) { // Check if results need to be emitted. // Results only need to be emitted if there is a non-null entry in a table // that is preserved or if there are no non-null entries boolean preserve = false; // Will be true if there is a non-null entry // in a preserved table boolean hasNulls = false; // Will be true if there are null entries boolean allOne = true; for (int i = 0; i < numAliases; i++) { Byte alias = order[i]; AbstractRowContainer<ArrayList<Object>> alw = storage.get(alias); if (alw.size() != 1) { allOne = false; } if (alw.size() == 0) { alw.add((ArrayList<Object>) dummyObj[i]); hasNulls = true; } else if (condn[i].getPreserved()) { preserve = true; } } if (hasNulls && !preserve) { return; } if (allOne) { LOG.info("calling genAllOneUniqueJoinObject"); genAllOneUniqueJoinObject(); LOG.info("called genAllOneUniqueJoinObject"); } else { LOG.trace("calling genUniqueJoinObject"); genUniqueJoinObject(0, 0); LOG.trace("called genUniqueJoinObject"); } } else { // does any result need to be emitted boolean mayHasMoreThanOne = false; boolean hasEmpty = false; for (int i = 0; i < numAliases; i++) { Byte alias = order[i]; AbstractRowContainer<ArrayList<Object>> alw = storage.get(alias); if (noOuterJoin) { if (alw.size() == 0) { LOG.trace("No data for alias=" + i); return; } else if (alw.size() > 1) { mayHasMoreThanOne = true; } } else { if (alw.size() == 0) { hasEmpty = true; alw.add((ArrayList<Object>) dummyObj[i]); } else if (!hasEmpty && alw.size() == 1) { ArrayList<Object> row = alw.first(); int numValues = joinValues.get(alias).size(); if (row == dummyObj[alias] || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) { hasEmpty = true; } } else { mayHasMoreThanOne = true; if (!hasEmpty) { int numValues = joinValues.get(alias).size(); for (ArrayList<Object> row = alw.first(); row != null; row = alw.next()) { if (row == dummyObj[alias] || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) { hasEmpty = true; break; } } } } } } if (!hasEmpty && !mayHasMoreThanOne) { LOG.trace("calling genAllOneUniqueJoinObject"); genAllOneUniqueJoinObject(); LOG.trace("called genAllOneUniqueJoinObject"); } else if (!hasEmpty && !hasLeftSemiJoin) { LOG.trace("calling genUniqueJoinObject"); genUniqueJoinObject(0, 0); LOG.trace("called genUniqueJoinObject"); } else { LOG.trace("calling genObject"); genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0), true); LOG.trace("called genObject"); } } } protected void reportProgress() { // Send some status periodically countAfterReport++; if ((countAfterReport % heartbeatInterval) == 0 && (reporter != null)) { reporter.progress(); countAfterReport = 0; } } /** * Returns true if the row does not pass through filters. */ protected static Boolean isFiltered(Object row, List<ExprNodeEvaluator> filters, List<ObjectInspector> ois) throws HiveException { // apply join filters on the row. Boolean ret = false; for (int j = 0; j < filters.size(); j++) { Object condition = filters.get(j).evaluate(row); ret = (Boolean) ((PrimitiveObjectInspector) ois.get(j)).getPrimitiveJavaObject(condition); if (ret == null || !ret) { return true; } } return false; } /** * All done. * */ @Override public void closeOp(boolean abort) throws HiveException { LOG.trace("Join Op close"); for (AbstractRowContainer<ArrayList<Object>> alw : storage.values()) { if (alw != null) { alw.clear(); // clean up the temp files } } storage.clear(); } @Override public String getName() { return "JOIN"; } /** * @return the posToAliasMap */ public Map<Integer, Set<String>> getPosToAliasMap() { return posToAliasMap; } /** * @param posToAliasMap * the posToAliasMap to set */ public void setPosToAliasMap(Map<Integer, Set<String>> posToAliasMap) { this.posToAliasMap = posToAliasMap; } }