/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.persistence.AbstractMapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinDoubleKeys; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinSingleKey; import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; public class JoinUtil { public static HashMap<Byte, List<ObjectInspector>> getObjectInspectorsFromEvaluators( Map<Byte, List<ExprNodeEvaluator>> exprEntries, ObjectInspector[] inputObjInspector, int posBigTableAlias) throws HiveException { HashMap<Byte, List<ObjectInspector>> result = new HashMap<Byte, List<ObjectInspector>>(); for (Entry<Byte, List<ExprNodeEvaluator>> exprEntry : exprEntries .entrySet()) { Byte alias = exprEntry.getKey(); //get big table if(alias == (byte) posBigTableAlias){ //skip the big tables continue; } List<ExprNodeEvaluator> exprList = exprEntry.getValue(); ArrayList<ObjectInspector> fieldOIList = new ArrayList<ObjectInspector>(); for (int i = 0; i < exprList.size(); i++) { fieldOIList.add(exprList.get(i).initialize(inputObjInspector[alias])); } result.put(alias, fieldOIList); } return result; } public static HashMap<Byte, List<ObjectInspector>> getStandardObjectInspectors( Map<Byte, List<ObjectInspector>> aliasToObjectInspectors, int posBigTableAlias) { HashMap<Byte, List<ObjectInspector>> result = new HashMap<Byte, List<ObjectInspector>>(); for (Entry<Byte, List<ObjectInspector>> oiEntry : aliasToObjectInspectors .entrySet()) { Byte alias = oiEntry.getKey(); //get big table if(alias == (byte) posBigTableAlias ){ //skip the big tables continue; } List<ObjectInspector> oiList = oiEntry.getValue(); ArrayList<ObjectInspector> fieldOIList = new ArrayList<ObjectInspector>( oiList.size()); for (int i = 0; i < oiList.size(); i++) { fieldOIList.add(ObjectInspectorUtils.getStandardObjectInspector(oiList .get(i), ObjectInspectorCopyOption.WRITABLE)); } result.put(alias, fieldOIList); } return result; } public static int populateJoinKeyValue(Map<Byte, List<ExprNodeEvaluator>> outMap, Map<Byte, List<ExprNodeDesc>> inputMap, Byte[] order, int posBigTableAlias) { int total = 0; Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> entryIter = inputMap .entrySet().iterator(); while (entryIter.hasNext()) { Map.Entry<Byte, List<ExprNodeDesc>> e = entryIter.next(); Byte key = order[e.getKey()]; List<ExprNodeEvaluator> valueFields = new ArrayList<ExprNodeEvaluator>(); List<ExprNodeDesc> expr = e.getValue(); int sz = expr.size(); total += sz; for (int j = 0; j < sz; j++) { if(key == (byte) posBigTableAlias){ valueFields.add(null); }else{ valueFields.add(ExprNodeEvaluatorFactory.get(expr.get(j))); } } outMap.put(key, valueFields); } return total; } /** * Return the key as a standard object. StandardObject can be inspected by a * standard ObjectInspector. */ public static ArrayList<Object> computeKeys(Object row, List<ExprNodeEvaluator> keyFields, List<ObjectInspector> keyFieldsOI) throws HiveException { // Compute the keys ArrayList<Object> nr = new ArrayList<Object>(keyFields.size()); for (int i = 0; i < keyFields.size(); i++) { nr.add(ObjectInspectorUtils.copyToStandardObject(keyFields.get(i) .evaluate(row), keyFieldsOI.get(i), ObjectInspectorCopyOption.WRITABLE)); } return nr; } /** * Return the key as a standard object. StandardObject can be inspected by a * standard ObjectInspector. */ public static AbstractMapJoinKey computeMapJoinKeys(Object row, List<ExprNodeEvaluator> keyFields, List<ObjectInspector> keyFieldsOI) throws HiveException { int size = keyFields.size(); if(size == 1){ Object obj = (ObjectInspectorUtils.copyToStandardObject(keyFields.get(0) .evaluate(row), keyFieldsOI.get(0), ObjectInspectorCopyOption.WRITABLE)); MapJoinSingleKey key = new MapJoinSingleKey(obj); return key; }else if(size == 2){ Object obj1 = (ObjectInspectorUtils.copyToStandardObject(keyFields.get(0) .evaluate(row), keyFieldsOI.get(0), ObjectInspectorCopyOption.WRITABLE)); Object obj2 = (ObjectInspectorUtils.copyToStandardObject(keyFields.get(1) .evaluate(row), keyFieldsOI.get(1), ObjectInspectorCopyOption.WRITABLE)); MapJoinDoubleKeys key = new MapJoinDoubleKeys(obj1,obj2); return key; }else{ // Compute the keys Object[] nr = new Object[keyFields.size()]; for (int i = 0; i < keyFields.size(); i++) { nr[i] = (ObjectInspectorUtils.copyToStandardObject(keyFields.get(i) .evaluate(row), keyFieldsOI.get(i), ObjectInspectorCopyOption.WRITABLE)); } MapJoinObjectKey key = new MapJoinObjectKey(nr); return key; } } /** * Return the value as a standard object. StandardObject can be inspected by a * standard ObjectInspector. */ public static Object[] computeMapJoinValues(Object row, List<ExprNodeEvaluator> valueFields, List<ObjectInspector> valueFieldsOI, List<ExprNodeEvaluator> filters, List<ObjectInspector> filtersOI, boolean noOuterJoin) throws HiveException { // Compute the keys Object[] nr; if (!noOuterJoin) { nr = new Object[valueFields.size()+1]; // add whether the row is filtered or not. nr[valueFields.size()] = new BooleanWritable(isFiltered(row, filters, filtersOI)); }else{ nr = new Object[valueFields.size()]; } for (int i = 0; i < valueFields.size(); i++) { nr[i] = ObjectInspectorUtils.copyToStandardObject(valueFields.get(i) .evaluate(row), valueFieldsOI.get(i), ObjectInspectorCopyOption.WRITABLE); } return nr; } /** * Return the value as a standard object. StandardObject can be inspected by a * standard ObjectInspector. */ public static ArrayList<Object> computeValues(Object row, List<ExprNodeEvaluator> valueFields, List<ObjectInspector> valueFieldsOI, List<ExprNodeEvaluator> filters, List<ObjectInspector> filtersOI, boolean noOuterJoin) throws HiveException { // Compute the values ArrayList<Object> nr = new ArrayList<Object>(valueFields.size()); for (int i = 0; i < valueFields.size(); i++) { nr.add(ObjectInspectorUtils.copyToStandardObject(valueFields.get(i) .evaluate(row), valueFieldsOI.get(i), ObjectInspectorCopyOption.WRITABLE)); } if (!noOuterJoin) { // add whether the row is filtered or not. nr.add(new BooleanWritable(isFiltered(row, filters, filtersOI))); } return nr; } /** * Returns true if the row does not pass through filters. */ protected static Boolean isFiltered(Object row, List<ExprNodeEvaluator> filters, List<ObjectInspector> ois) throws HiveException { // apply join filters on the row. Boolean ret = false; for (int j = 0; j < filters.size(); j++) { Object condition = filters.get(j).evaluate(row); ret = (Boolean) ((PrimitiveObjectInspector) ois.get(j)).getPrimitiveJavaObject(condition); if (ret == null || !ret) { return true; } } return false; } public static TableDesc getSpillTableDesc(Byte alias, Map<Byte, TableDesc> spillTableDesc,JoinDesc conf, boolean noOuterJoin) { if (spillTableDesc == null || spillTableDesc.size() == 0) { spillTableDesc = initSpillTables(conf,noOuterJoin); } return spillTableDesc.get(alias); } public static Map<Byte, TableDesc> getSpillTableDesc( Map<Byte, TableDesc> spillTableDesc,JoinDesc conf, boolean noOuterJoin) { if (spillTableDesc == null) { spillTableDesc = initSpillTables(conf,noOuterJoin); } return spillTableDesc; } public static SerDe getSpillSerDe(byte alias, Map<Byte, TableDesc> spillTableDesc,JoinDesc conf, boolean noOuterJoin) { TableDesc desc = getSpillTableDesc(alias, spillTableDesc, conf, noOuterJoin); if (desc == null) { return null; } SerDe sd = (SerDe) ReflectionUtils.newInstance(desc.getDeserializerClass(), null); try { sd.initialize(null, desc.getProperties()); } catch (SerDeException e) { e.printStackTrace(); return null; } return sd; } public static Map<Byte, TableDesc> initSpillTables(JoinDesc conf,boolean noOuterJoin) { Map<Byte, List<ExprNodeDesc>> exprs = conf.getExprs(); Map<Byte, TableDesc> spillTableDesc = new HashMap<Byte, TableDesc>(exprs.size()); for (int tag = 0; tag < exprs.size(); tag++) { List<ExprNodeDesc> valueCols = exprs.get((byte) tag); int columnSize = valueCols.size(); StringBuilder colNames = new StringBuilder(); StringBuilder colTypes = new StringBuilder(); if (columnSize <= 0) { continue; } for (int k = 0; k < columnSize; k++) { String newColName = tag + "_VALUE_" + k; // any name, it does not // matter. colNames.append(newColName); colNames.append(','); colTypes.append(valueCols.get(k).getTypeString()); colTypes.append(','); } if (!noOuterJoin) { colNames.append("filtered"); colNames.append(','); colTypes.append(TypeInfoFactory.booleanTypeInfo.getTypeName()); colTypes.append(','); } // remove the last ',' colNames.setLength(colNames.length() - 1); colTypes.setLength(colTypes.length() - 1); TableDesc tblDesc = new TableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class, HiveSequenceFileOutputFormat.class, Utilities.makeProperties( org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, colNames .toString(), org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes.toString())); spillTableDesc.put((byte) tag, tblDesc); } return spillTableDesc; } public static RowContainer getRowContainer(Configuration hconf, List<ObjectInspector> structFieldObjectInspectors, Byte alias,int containerSize, Map<Byte, TableDesc> spillTableDesc, JoinDesc conf,boolean noOuterJoin) throws HiveException { TableDesc tblDesc = JoinUtil.getSpillTableDesc(alias,spillTableDesc,conf, noOuterJoin); SerDe serde = JoinUtil.getSpillSerDe(alias, spillTableDesc, conf, noOuterJoin); if (serde == null) { containerSize = -1; } RowContainer rc = new RowContainer(containerSize, hconf); StructObjectInspector rcOI = null; if (tblDesc != null) { // arbitrary column names used internally for serializing to spill table List<String> colNames = Utilities.getColumnNames(tblDesc.getProperties()); // object inspector for serializing input tuples rcOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, structFieldObjectInspectors); } rc.setSerDe(serde, rcOI); rc.setTableDesc(tblDesc); return rc; } }