/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl.common; import java.io.IOException; import java.util.Arrays; import java.util.List; import org.apache.drill.common.expression.ErrorCollector; import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.logical.data.NamedExpression; import org.apache.drill.common.types.Types; import org.apache.drill.exec.compile.sig.GeneratorMapping; import org.apache.drill.exec.compile.sig.MappingSet; import org.apache.drill.exec.exception.ClassTransformationException; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.expr.ClassGenerator; import org.apache.drill.exec.expr.ClassGenerator.HoldingContainer; import org.apache.drill.exec.expr.CodeGenerator; import org.apache.drill.exec.expr.ExpressionTreeMaterializer; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.expr.ValueVectorReadExpression; import org.apache.drill.exec.expr.ValueVectorWriteExpression; import org.apache.drill.exec.expr.fn.FunctionGenerationHelper; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.physical.impl.join.JoinUtils; import org.apache.drill.exec.planner.physical.HashPrelUtil; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.TypedFieldId; import org.apache.drill.exec.record.VectorAccessible; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.vector.ValueVector; import com.sun.codemodel.JConditional; import com.sun.codemodel.JExpr; public class ChainedHashTable { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ChainedHashTable.class); private static final GeneratorMapping KEY_MATCH_BUILD = GeneratorMapping.create("setupInterior" /* setup method */, "isKeyMatchInternalBuild" /* eval method */, null /* reset */, null /* cleanup */); private static final GeneratorMapping KEY_MATCH_PROBE = GeneratorMapping.create("setupInterior" /* setup method */, "isKeyMatchInternalProbe" /* eval method */, null /* reset */, null /* cleanup */); private static final GeneratorMapping GET_HASH_BUILD = GeneratorMapping.create("doSetup" /* setup method */, "getHashBuild" /* eval method */, null /* reset */, null /* cleanup */); private static final GeneratorMapping GET_HASH_PROBE = GeneratorMapping.create("doSetup" /* setup method */, "getHashProbe" /* eval method */, null /* reset */, null /* cleanup */); private static final GeneratorMapping SET_VALUE = GeneratorMapping.create("setupInterior" /* setup method */, "setValue" /* eval method */, null /* reset */, null /* cleanup */); private static final GeneratorMapping OUTPUT_KEYS = GeneratorMapping.create("setupInterior" /* setup method */, "outputRecordKeys" /* eval method */, null /* reset */, null /* cleanup */); // GM for putting constant expression into method "setupInterior" private static final GeneratorMapping SETUP_INTERIOR_CONSTANT = GeneratorMapping.create("setupInterior" /* setup method */, "setupInterior" /* eval method */, null /* reset */, null /* cleanup */); // GM for putting constant expression into method "doSetup" private static final GeneratorMapping DO_SETUP_CONSTANT = GeneratorMapping.create("doSetup" /* setup method */, "doSetup" /* eval method */, null /* reset */, null /* cleanup */); private final MappingSet KeyMatchIncomingBuildMapping = new MappingSet("incomingRowIdx", null, "incomingBuild", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_BUILD); private final MappingSet KeyMatchIncomingProbeMapping = new MappingSet("incomingRowIdx", null, "incomingProbe", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_PROBE); private final MappingSet KeyMatchHtableMapping = new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_BUILD); private final MappingSet KeyMatchHtableProbeMapping = new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_PROBE); private final MappingSet GetHashIncomingBuildMapping = new MappingSet("incomingRowIdx", null, "incomingBuild", null, DO_SETUP_CONSTANT, GET_HASH_BUILD); private final MappingSet GetHashIncomingProbeMapping = new MappingSet("incomingRowIdx", null, "incomingProbe", null, DO_SETUP_CONSTANT, GET_HASH_PROBE); private final MappingSet SetValueMapping = new MappingSet("incomingRowIdx" /* read index */, "htRowIdx" /* write index */, "incomingBuild" /* read container */, "htContainer" /* write container */, SETUP_INTERIOR_CONSTANT, SET_VALUE); private final MappingSet OutputRecordKeysMapping = new MappingSet("htRowIdx" /* read index */, "outRowIdx" /* write index */, "htContainer" /* read container */, "outgoing" /* write container */, SETUP_INTERIOR_CONSTANT, OUTPUT_KEYS); private HashTableConfig htConfig; private final FragmentContext context; private final BufferAllocator allocator; private final RecordBatch incomingBuild; private final RecordBatch incomingProbe; private final RecordBatch outgoing; public ChainedHashTable(HashTableConfig htConfig, FragmentContext context, BufferAllocator allocator, RecordBatch incomingBuild, RecordBatch incomingProbe, RecordBatch outgoing) { this.htConfig = htConfig; this.context = context; this.allocator = allocator; this.incomingBuild = incomingBuild; this.incomingProbe = incomingProbe; this.outgoing = outgoing; } public HashTable createAndSetupHashTable(TypedFieldId[] outKeyFieldIds) throws ClassTransformationException, IOException, SchemaChangeException { CodeGenerator<HashTable> top = CodeGenerator.get(HashTable.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions()); top.plainJavaCapable(true); // Uncomment out this line to debug the generated code. // This code is called from generated code, so to step into this code, // persist the code generated in HashAggBatch also. // top.saveCodeForDebugging(true); ClassGenerator<HashTable> cg = top.getRoot(); ClassGenerator<HashTable> cgInner = cg.getInnerGenerator("BatchHolder"); LogicalExpression[] keyExprsBuild = new LogicalExpression[htConfig.getKeyExprsBuild().size()]; LogicalExpression[] keyExprsProbe = null; boolean isProbe = (htConfig.getKeyExprsProbe() != null); if (isProbe) { keyExprsProbe = new LogicalExpression[htConfig.getKeyExprsProbe().size()]; } ErrorCollector collector = new ErrorCollectorImpl(); VectorContainer htContainerOrig = new VectorContainer(); // original ht container from which others may be cloned LogicalExpression[] htKeyExprs = new LogicalExpression[htConfig.getKeyExprsBuild().size()]; TypedFieldId[] htKeyFieldIds = new TypedFieldId[htConfig.getKeyExprsBuild().size()]; int i = 0; for (NamedExpression ne : htConfig.getKeyExprsBuild()) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incomingBuild, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException("Failure while materializing expression. " + collector.toErrorString()); } if (expr == null) { continue; } keyExprsBuild[i] = expr; i++; } if (isProbe) { i = 0; for (NamedExpression ne : htConfig.getKeyExprsProbe()) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incomingProbe, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException("Failure while materializing expression. " + collector.toErrorString()); } if (expr == null) { continue; } keyExprsProbe[i] = expr; i++; } JoinUtils.addLeastRestrictiveCasts(keyExprsProbe, incomingProbe, keyExprsBuild, incomingBuild, context); } i = 0; /* * Once the implicit casts have been added, create the value vectors for the corresponding * type and add it to the hash table's container. * Note: Adding implicit casts may have a minor impact on the memory foot print. For example * if we have a join condition with bigint on the probe side and int on the build side then * after this change we will be allocating a bigint vector in the hashtable instead of an int * vector. */ for (NamedExpression ne : htConfig.getKeyExprsBuild()) { LogicalExpression expr = keyExprsBuild[i]; final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsUnescapedPath(), expr.getMajorType()); @SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, allocator); htKeyFieldIds[i] = htContainerOrig.add(vv); i++; } // generate code for isKeyMatch(), setValue(), getHash() and outputRecordKeys() setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, KeyMatchHtableMapping, keyExprsBuild, htConfig.getComparators(), htKeyFieldIds); setupIsKeyMatchInternal(cgInner, KeyMatchIncomingProbeMapping, KeyMatchHtableProbeMapping, keyExprsProbe, htConfig.getComparators(), htKeyFieldIds); setupSetValue(cgInner, keyExprsBuild, htKeyFieldIds); if (outgoing != null) { if (outKeyFieldIds.length > htConfig.getKeyExprsBuild().size()) { throw new IllegalArgumentException("Mismatched number of output key fields."); } } setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds); setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild, false); setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe, true); HashTable ht = context.getImplementationClass(top); ht.setup(htConfig, context, allocator, incomingBuild, incomingProbe, outgoing, htContainerOrig); return ht; } private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet incomingMapping, MappingSet htableMapping, LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds) throws SchemaChangeException { cg.setMappingSet(incomingMapping); if (keyExprs == null || keyExprs.length == 0) { cg.getEvalBlock()._return(JExpr.FALSE); return; } for (int i=0; i<keyExprs.length; i++) { final LogicalExpression expr = keyExprs[i]; cg.setMappingSet(incomingMapping); HoldingContainer left = cg.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE); cg.setMappingSet(htableMapping); ValueVectorReadExpression vvrExpr = new ValueVectorReadExpression(htKeyFieldIds[i]); HoldingContainer right = cg.addExpr(vvrExpr, ClassGenerator.BlkCreateMode.FALSE); JConditional jc; // codegen for nullable columns if nulls are not equal if (comparators.get(i) == Comparator.EQUALS && left.isOptional() && right.isOptional()) { jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)). cand(right.getIsSet().eq(JExpr.lit(0)))); jc._then()._return(JExpr.FALSE); } final LogicalExpression f = FunctionGenerationHelper .getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry()); HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE); // check if two values are not equal (comparator result != 0) jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); jc._then()._return(JExpr.FALSE); } // All key expressions compared equal, so return TRUE cg.getEvalBlock()._return(JExpr.TRUE); } private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] keyExprs, TypedFieldId[] htKeyFieldIds) throws SchemaChangeException { cg.setMappingSet(SetValueMapping); int i = 0; for (LogicalExpression expr : keyExprs) { boolean useSetSafe = !Types.isFixedWidthType(expr.getMajorType()) || Types.isRepeated(expr.getMajorType()); ValueVectorWriteExpression vvwExpr = new ValueVectorWriteExpression(htKeyFieldIds[i++], expr, useSetSafe); cg.addExpr(vvwExpr, ClassGenerator.BlkCreateMode.FALSE); // this will write to the htContainer at htRowIdx } } private void setupOutputRecordKeys(ClassGenerator<HashTable> cg, TypedFieldId[] htKeyFieldIds, TypedFieldId[] outKeyFieldIds) { cg.setMappingSet(OutputRecordKeysMapping); if (outKeyFieldIds != null) { for (int i = 0; i < outKeyFieldIds.length; i++) { ValueVectorReadExpression vvrExpr = new ValueVectorReadExpression(htKeyFieldIds[i]); boolean useSetSafe = !Types.isFixedWidthType(vvrExpr.getMajorType()) || Types.isRepeated(vvrExpr.getMajorType()); ValueVectorWriteExpression vvwExpr = new ValueVectorWriteExpression(outKeyFieldIds[i], vvrExpr, useSetSafe); cg.addExpr(vvwExpr, ClassGenerator.BlkCreateMode.TRUE); } } } private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, boolean isProbe) throws SchemaChangeException { cg.setMappingSet(incomingMapping); if (keyExprs == null || keyExprs.length == 0) { cg.getEvalBlock()._return(JExpr.lit(0)); return; } /* * We use the same logic to generate run time code for the hash function both for hash join and hash * aggregate. For join we need to hash everything as double (both for distribution and for comparison) but * for aggregation we can avoid the penalty of casting to double */ LogicalExpression hashExpression = HashPrelUtil.getHashExpression(Arrays.asList(keyExprs), incomingProbe != null ? true : false); final LogicalExpression materializedExpr = ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch, context.getFunctionRegistry()); HoldingContainer hash = cg.addExpr(materializedExpr); cg.getEvalBlock()._return(hash.getValue()); } }