HashJoinBatch.java example

Explorer
drill-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.impl.join;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import com.google.common.collect.Lists;
import org.apache.drill.common.expression.FieldReference;
import org.apache.drill.common.logical.data.JoinCondition;
import org.apache.drill.common.logical.data.NamedExpression;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.compile.sig.GeneratorMapping;
import org.apache.drill.exec.compile.sig.MappingSet;
import org.apache.drill.exec.exception.ClassTransformationException;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.ClassGenerator;
import org.apache.drill.exec.expr.CodeGenerator;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.ops.MetricDef;
import org.apache.drill.exec.physical.config.HashJoinPOP;
import org.apache.drill.exec.physical.impl.common.ChainedHashTable;
import org.apache.drill.exec.physical.impl.common.HashTable;
import org.apache.drill.exec.physical.impl.common.HashTableConfig;
import org.apache.drill.exec.physical.impl.common.HashTableStats;
import org.apache.drill.exec.physical.impl.common.IndexPointer;
import org.apache.drill.exec.physical.impl.common.Comparator;
import org.apache.drill.exec.physical.impl.sort.RecordBatchData;
import org.apache.drill.exec.record.AbstractRecordBatch;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
import org.apache.drill.exec.record.ExpandableHyperContainer;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.AbstractContainerVector;
import org.apache.calcite.rel.core.JoinRelType;

import com.sun.codemodel.JExpr;
import com.sun.codemodel.JExpression;
import com.sun.codemodel.JVar;

public class HashJoinBatch extends AbstractRecordBatch<HashJoinPOP> {
  public static final long ALLOCATOR_INITIAL_RESERVATION = 1 * 1024 * 1024;
  public static final long ALLOCATOR_MAX_RESERVATION = 20L * 1000 * 1000 * 1000;

  // Probe side record batch
  private final RecordBatch left;

  // Build side record batch
  private final RecordBatch right;

  // Join type, INNER, LEFT, RIGHT or OUTER
  private final JoinRelType joinType;

  // Join conditions
  private final List<JoinCondition> conditions;

  private final List<Comparator> comparators;

  // Runtime generated class implementing HashJoinProbe interface
  private HashJoinProbe hashJoinProbe = null;

  /* Helper class
   * Maintains linked list of build side records with the same key
   * Keeps information about which build records have a corresponding
   * matching key in the probe side (for outer, right joins)
   */
  private HashJoinHelper hjHelper = null;

  // Underlying hashtable used by the hash join
  private HashTable hashTable = null;

  /* Hyper container to store all build side record batches.
   * Records are retrieved from this container when there is a matching record
   * on the probe side
   */
  private ExpandableHyperContainer hyperContainer;

  // Number of records in the output container
  private int outputRecords;

  // Current batch index on the build side
  private int buildBatchIndex = 0;

  // Schema of the build side
  private BatchSchema rightSchema = null;


  // Generator mapping for the build side
  // Generator mapping for the build side : scalar
  private static final GeneratorMapping PROJECT_BUILD =
      GeneratorMapping.create("doSetup"/* setup method */, "projectBuildRecord" /* eval method */, null /* reset */,
          null /* cleanup */);
  // Generator mapping for the build side : constant
  private static final GeneratorMapping PROJECT_BUILD_CONSTANT = GeneratorMapping.create("doSetup"/* setup method */,
      "doSetup" /* eval method */,
      null /* reset */, null /* cleanup */);

  // Generator mapping for the probe side : scalar
  private static final GeneratorMapping PROJECT_PROBE =
      GeneratorMapping.create("doSetup" /* setup method */, "projectProbeRecord" /* eval method */, null /* reset */,
          null /* cleanup */);
  // Generator mapping for the probe side : constant
  private static final GeneratorMapping PROJECT_PROBE_CONSTANT = GeneratorMapping.create("doSetup" /* setup method */,
      "doSetup" /* eval method */,
      null /* reset */, null /* cleanup */);


  // Mapping set for the build side
  private final MappingSet projectBuildMapping =
      new MappingSet("buildIndex" /* read index */, "outIndex" /* write index */, "buildBatch" /* read container */,
          "outgoing" /* write container */, PROJECT_BUILD_CONSTANT, PROJECT_BUILD);

  // Mapping set for the probe side
  private final MappingSet projectProbeMapping = new MappingSet("probeIndex" /* read index */, "outIndex" /* write index */,
      "probeBatch" /* read container */,
      "outgoing" /* write container */,
      PROJECT_PROBE_CONSTANT, PROJECT_PROBE);

  // indicates if we have previously returned an output batch
  boolean firstOutputBatch = true;

  IterOutcome leftUpstream = IterOutcome.NONE;
  IterOutcome rightUpstream = IterOutcome.NONE;

  private final HashTableStats htStats = new HashTableStats();

  public enum Metric implements MetricDef {

    NUM_BUCKETS,
    NUM_ENTRIES,
    NUM_RESIZING,
    RESIZING_TIME;

    // duplicate for hash ag

    @Override
    public int metricId() {
      return ordinal();
    }
  }

  @Override
  public int getRecordCount() {
    return outputRecords;
  }

  @Override
  protected void buildSchema() throws SchemaChangeException {
    leftUpstream = next(left);
    rightUpstream = next(right);

    if (leftUpstream == IterOutcome.STOP || rightUpstream == IterOutcome.STOP) {
      state = BatchState.STOP;
      return;
    }

    if (leftUpstream == IterOutcome.OUT_OF_MEMORY || rightUpstream == IterOutcome.OUT_OF_MEMORY) {
      state = BatchState.OUT_OF_MEMORY;
      return;
    }

    // Initialize the hash join helper context
    hjHelper = new HashJoinHelper(context, oContext.getAllocator());
    try {
      rightSchema = right.getSchema();
      final VectorContainer vectors = new VectorContainer(oContext);
      for (final VectorWrapper<?> w : right) {
        vectors.addOrGet(w.getField());
      }
      vectors.buildSchema(SelectionVectorMode.NONE);
      vectors.setRecordCount(0);
      hyperContainer = new ExpandableHyperContainer(vectors);
      hjHelper.addNewBatch(0);
      buildBatchIndex++;
      setupHashTable();
      hashJoinProbe = setupHashJoinProbe();
      // Build the container schema and set the counts
      for (final VectorWrapper<?> w : container) {
        w.getValueVector().allocateNew();
      }
      container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
      container.setRecordCount(outputRecords);
    } catch (IOException | ClassTransformationException e) {
      throw new SchemaChangeException(e);
    }
  }

  @Override
  public IterOutcome innerNext() {
    try {
      /* If we are here for the first time, execute the build phase of the
       * hash join and setup the run time generated class for the probe side
       */
      if (state == BatchState.FIRST) {
        // Build the hash table, using the build side record batches.
        executeBuildPhase();
        //                IterOutcome next = next(HashJoinHelper.LEFT_INPUT, left);
        hashJoinProbe.setupHashJoinProbe(context, hyperContainer, left, left.getRecordCount(), this, hashTable,
            hjHelper, joinType);

        // Update the hash table related stats for the operator
        updateStats(this.hashTable);
      }

      // Store the number of records projected
      if (!hashTable.isEmpty() || joinType != JoinRelType.INNER) {

        // Allocate the memory for the vectors in the output container
        allocateVectors();

        outputRecords = hashJoinProbe.probeAndProject();

        /* We are here because of one the following
         * 1. Completed processing of all the records and we are done
         * 2. We've filled up the outgoing batch to the maximum and we need to return upstream
         * Either case build the output container's schema and return
         */
        if (outputRecords > 0 || state == BatchState.FIRST) {
          if (state == BatchState.FIRST) {
            state = BatchState.NOT_FIRST;
          }

          for (final VectorWrapper<?> v : container) {
            v.getValueVector().getMutator().setValueCount(outputRecords);
          }

          return IterOutcome.OK;
        }
      } else {
        // Our build side is empty, we won't have any matches, clear the probe side
        if (leftUpstream == IterOutcome.OK_NEW_SCHEMA || leftUpstream == IterOutcome.OK) {
          for (final VectorWrapper<?> wrapper : left) {
            wrapper.getValueVector().clear();
          }
          left.kill(true);
          leftUpstream = next(HashJoinHelper.LEFT_INPUT, left);
          while (leftUpstream == IterOutcome.OK_NEW_SCHEMA || leftUpstream == IterOutcome.OK) {
            for (final VectorWrapper<?> wrapper : left) {
              wrapper.getValueVector().clear();
            }
            leftUpstream = next(HashJoinHelper.LEFT_INPUT, left);
          }
        }
      }

      // No more output records, clean up and return
      state = BatchState.DONE;
      //            if (first) {
      //              return IterOutcome.OK_NEW_SCHEMA;
      //            }
      return IterOutcome.NONE;
    } catch (ClassTransformationException | SchemaChangeException | IOException e) {
      context.fail(e);
      killIncoming(false);
      return IterOutcome.STOP;
    }
  }

  public void setupHashTable() throws IOException, SchemaChangeException, ClassTransformationException {
    // Setup the hash table configuration object
    int conditionsSize = conditions.size();
    final List<NamedExpression> rightExpr = new ArrayList<>(conditionsSize);
    List<NamedExpression> leftExpr = new ArrayList<>(conditionsSize);

    // Create named expressions from the conditions
    for (int i = 0; i < conditionsSize; i++) {
      rightExpr.add(new NamedExpression(conditions.get(i).getRight(), new FieldReference("build_side_" + i)));
      leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
    }

    // Set the left named expression to be null if the probe batch is empty.
    if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
      leftExpr = null;
    } else {
      if (left.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
        final String errorMsg = new StringBuilder()
            .append("Hash join does not support probe batch with selection vectors. ")
            .append("Probe batch has selection mode = ")
            .append(left.getSchema().getSelectionVectorMode())
            .toString();
        throw new SchemaChangeException(errorMsg);
      }
    }

    final HashTableConfig htConfig =
        new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE),
            HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators);

    // Create the chained hash table
    final ChainedHashTable ht =
        new ChainedHashTable(htConfig, context, oContext.getAllocator(), this.right, this.left, null);
    hashTable = ht.createAndSetupHashTable(null);
  }

  public void executeBuildPhase() throws SchemaChangeException, ClassTransformationException, IOException {
    //Setup the underlying hash table

    // skip first batch if count is zero, as it may be an empty schema batch
    if (right.getRecordCount() == 0) {
      for (final VectorWrapper<?> w : right) {
        w.clear();
      }
      rightUpstream = next(right);
    }

    boolean moreData = true;

    while (moreData) {
      switch (rightUpstream) {
      case OUT_OF_MEMORY:
      case NONE:
      case NOT_YET:
      case STOP:
        moreData = false;
        continue;

      case OK_NEW_SCHEMA:
        if (rightSchema == null) {
          rightSchema = right.getSchema();

          if (rightSchema.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
            final String errorMsg = new StringBuilder()
                .append("Hash join does not support build batch with selection vectors. ")
                .append("Build batch has selection mode = ")
                .append(left.getSchema().getSelectionVectorMode())
                .toString();

            throw new SchemaChangeException(errorMsg);
          }
          setupHashTable();
        } else {
          if (!rightSchema.equals(right.getSchema())) {
            throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", rightSchema, right.getSchema());
          }
          hashTable.updateBatches();
        }
        // Fall through
      case OK:
        final int currentRecordCount = right.getRecordCount();

                    /* For every new build batch, we store some state in the helper context
                     * Add new state to the helper context
                     */
        hjHelper.addNewBatch(currentRecordCount);

        // Holder contains the global index where the key is hashed into using the hash table
        final IndexPointer htIndex = new IndexPointer();

        // For every record in the build batch , hash the key columns
        for (int i = 0; i < currentRecordCount; i++) {
          hashTable.put(i, htIndex, 1 /* retry count */);

                        /* Use the global index returned by the hash table, to store
                         * the current record index and batch index. This will be used
                         * later when we probe and find a match.
                         */
          hjHelper.setCurrentIndex(htIndex.value, buildBatchIndex, i);
        }

                    /* Completed hashing all records in this batch. Transfer the batch
                     * to the hyper vector container. Will be used when we want to retrieve
                     * records that have matching keys on the probe side.
                     */
        final RecordBatchData nextBatch = new RecordBatchData(right, oContext.getAllocator());
        boolean success = false;
        try {
          if (hyperContainer == null) {
            hyperContainer = new ExpandableHyperContainer(nextBatch.getContainer());
          } else {
            hyperContainer.addBatch(nextBatch.getContainer());
          }

          // completed processing a batch, increment batch index
          buildBatchIndex++;
          success = true;
        } finally {
          if (!success) {
            nextBatch.clear();
          }
        }
        break;
      }
      // Get the next record batch
      rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
    }
  }

  public HashJoinProbe setupHashJoinProbe() throws ClassTransformationException, IOException {
    final CodeGenerator<HashJoinProbe> cg = CodeGenerator.get(HashJoinProbe.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    cg.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
//    cg.saveCodeForDebugging(true);
    final ClassGenerator<HashJoinProbe> g = cg.getRoot();

    // Generate the code to project build side records
    g.setMappingSet(projectBuildMapping);

    int fieldId = 0;
    final JExpression buildIndex = JExpr.direct("buildIndex");
    final JExpression outIndex = JExpr.direct("outIndex");
    g.rotateBlock();

    if (rightSchema != null) {
      for (final MaterializedField field : rightSchema) {
        final MajorType inputType = field.getType();
        final MajorType outputType;
        // If left or full outer join, then the output type must be nullable. However, map types are
        // not nullable so we must exclude them from the check below (see DRILL-2197).
        if ((joinType == JoinRelType.LEFT || joinType == JoinRelType.FULL) && inputType.getMode() == DataMode.REQUIRED
            && inputType.getMinorType() != TypeProtos.MinorType.MAP) {
          outputType = Types.overrideMode(inputType, DataMode.OPTIONAL);
        } else {
          outputType = inputType;
        }

        // make sure to project field with children for children to show up in the schema
        final MaterializedField projected = field.withType(outputType);
        // Add the vector to our output container
        container.addOrGet(projected);

        final JVar inVV = g.declareVectorValueSetupAndMember("buildBatch", new TypedFieldId(field.getType(), true, fieldId));
        final JVar outVV = g.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, fieldId));
        g.getEvalBlock().add(outVV.invoke("copyFromSafe")
            .arg(buildIndex.band(JExpr.lit((int) Character.MAX_VALUE)))
            .arg(outIndex)
            .arg(inVV.component(buildIndex.shrz(JExpr.lit(16)))));
        g.rotateBlock();
        fieldId++;
      }
    }

    // Generate the code to project probe side records
    g.setMappingSet(projectProbeMapping);

    int outputFieldId = fieldId;
    fieldId = 0;
    final JExpression probeIndex = JExpr.direct("probeIndex");

    if (leftUpstream == IterOutcome.OK || leftUpstream == IterOutcome.OK_NEW_SCHEMA) {
      for (final VectorWrapper<?> vv : left) {
        final MajorType inputType = vv.getField().getType();
        final MajorType outputType;

        // If right or full outer join then the output type should be optional. However, map types are
        // not nullable so we must exclude them from the check below (see DRILL-2771, DRILL-2197).
        if ((joinType == JoinRelType.RIGHT || joinType == JoinRelType.FULL) && inputType.getMode() == DataMode.REQUIRED
            && inputType.getMinorType() != TypeProtos.MinorType.MAP) {
          outputType = Types.overrideMode(inputType, DataMode.OPTIONAL);
        } else {
          outputType = inputType;
        }

        final ValueVector v = container.addOrGet(MaterializedField.create(vv.getField().getPath(), outputType));
        if (v instanceof AbstractContainerVector) {
          vv.getValueVector().makeTransferPair(v);
          v.clear();
        }

        final JVar inVV = g.declareVectorValueSetupAndMember("probeBatch", new TypedFieldId(inputType, false, fieldId));
        final JVar outVV = g.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, outputFieldId));

        g.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(probeIndex).arg(outIndex).arg(inVV));
        g.rotateBlock();
        fieldId++;
        outputFieldId++;
      }
    }

    final HashJoinProbe hj = context.getImplementationClass(cg);
    return hj;
  }

  private void allocateVectors() {
    for (final VectorWrapper<?> v : container) {
      v.getValueVector().allocateNew();
    }
  }

  public HashJoinBatch(HashJoinPOP popConfig, FragmentContext context, RecordBatch left,
      RecordBatch right) throws OutOfMemoryException {
    super(popConfig, context, true);
    this.left = left;
    this.right = right;
    joinType = popConfig.getJoinType();
    conditions = popConfig.getConditions();

    comparators = Lists.newArrayListWithExpectedSize(conditions.size());
    for (int i=0; i<conditions.size(); i++) {
      JoinCondition cond = conditions.get(i);
      comparators.add(JoinUtils.checkAndReturnSupportedJoinComparator(cond));
    }
  }

  private void updateStats(HashTable htable) {
    if (htable == null) {
      return;
    }
    htable.getStats(htStats);
    stats.setLongStat(Metric.NUM_BUCKETS, htStats.numBuckets);
    stats.setLongStat(Metric.NUM_ENTRIES, htStats.numEntries);
    stats.setLongStat(Metric.NUM_RESIZING, htStats.numResizing);
    stats.setLongStat(Metric.RESIZING_TIME, htStats.resizingTime);
  }

  @Override
  public void killIncoming(boolean sendUpstream) {
    left.kill(sendUpstream);
    right.kill(sendUpstream);
  }

  @Override
  public void close() {
    if (hjHelper != null) {
      hjHelper.clear();
    }

    // If we didn't receive any data, hyperContainer may be null, check before clearing
    if (hyperContainer != null) {
      hyperContainer.clear();
    }

    if (hashTable != null) {
      hashTable.clear();
    }
    super.close();
  }
}