NestedLoopJoinBatch.java example

Explorer
drill-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.impl.join;

import java.io.IOException;
import java.util.LinkedList;
import java.util.Map;

import com.google.common.collect.ImmutableMap;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.ErrorCollector;
import org.apache.drill.common.expression.ErrorCollectorImpl;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.compile.sig.GeneratorMapping;
import org.apache.drill.exec.compile.sig.MappingSet;
import org.apache.drill.exec.exception.ClassTransformationException;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.ClassGenerator;
import org.apache.drill.exec.expr.CodeGenerator;
import org.apache.drill.exec.expr.BatchReference;
import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.config.NestedLoopJoinPOP;
import org.apache.drill.exec.physical.impl.filter.ReturnValueExpression;
import org.apache.drill.exec.physical.impl.sort.RecordBatchData;
import org.apache.drill.exec.record.AbstractRecordBatch;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.ExpandableHyperContainer;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.AllocationHelper;

import com.google.common.base.Preconditions;
import com.sun.codemodel.JExpr;
import com.sun.codemodel.JExpression;
import com.sun.codemodel.JVar;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.AbstractContainerVector;

/*
 * RecordBatch implementation for the nested loop join operator
 */
public class NestedLoopJoinBatch extends AbstractRecordBatch<NestedLoopJoinPOP> {
  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(NestedLoopJoinBatch.class);

  // Maximum number records in the outgoing batch
  protected static final int MAX_BATCH_SIZE = 4096;

  // Input indexes to correctly update the stats
  protected static final int LEFT_INPUT = 0;
  protected static final int RIGHT_INPUT = 1;

  // Left input to the nested loop join operator
  private final RecordBatch left;

  // Schema on the left side
  private BatchSchema leftSchema = null;

  // state (IterOutcome) of the left input
  private IterOutcome leftUpstream = IterOutcome.NONE;

  // Right input to the nested loop join operator.
  private final RecordBatch right;

  // Schema on the right side
  private BatchSchema rightSchema = null;

  // state (IterOutcome) of the right input
  private IterOutcome rightUpstream = IterOutcome.NONE;

  // Runtime generated class implementing the NestedLoopJoin interface
  private NestedLoopJoin nljWorker = null;

  // Number of output records in the current outgoing batch
  private int outputRecords = 0;

  // We accumulate all the batches on the right side in a hyper container.
  private ExpandableHyperContainer rightContainer = new ExpandableHyperContainer();

  // Record count of the individual batches in the right hyper container
  private LinkedList<Integer> rightCounts = new LinkedList<>();


  // Generator mapping for the right side
  private static final GeneratorMapping EMIT_RIGHT =
      GeneratorMapping.create("doSetup"/* setup method */, "emitRight" /* eval method */, null /* reset */,
          null /* cleanup */);
  // Generator mapping for the right side : constant
  private static final GeneratorMapping EMIT_RIGHT_CONSTANT = GeneratorMapping.create("doSetup"/* setup method */,
      "doSetup" /* eval method */,
      null /* reset */, null /* cleanup */);

  // Generator mapping for the left side : scalar
  private static final GeneratorMapping EMIT_LEFT =
      GeneratorMapping.create("doSetup" /* setup method */, "emitLeft" /* eval method */, null /* reset */,
          null /* cleanup */);
  // Generator mapping for the left side : constant
  private static final GeneratorMapping EMIT_LEFT_CONSTANT = GeneratorMapping.create("doSetup" /* setup method */,
      "doSetup" /* eval method */,
      null /* reset */, null /* cleanup */);


  // Mapping set for the right side
  private static final MappingSet emitRightMapping =
      new MappingSet("rightCompositeIndex" /* read index */, "outIndex" /* write index */, "rightContainer" /* read container */,
          "outgoing" /* write container */, EMIT_RIGHT_CONSTANT, EMIT_RIGHT);

  // Mapping set for the left side
  private static final MappingSet emitLeftMapping = new MappingSet("leftIndex" /* read index */, "outIndex" /* write index */,
      "leftBatch" /* read container */,
      "outgoing" /* write container */,
      EMIT_LEFT_CONSTANT, EMIT_LEFT);

  protected NestedLoopJoinBatch(NestedLoopJoinPOP popConfig, FragmentContext context, RecordBatch left, RecordBatch right) throws OutOfMemoryException {
    super(popConfig, context);
    Preconditions.checkNotNull(left);
    Preconditions.checkNotNull(right);
    this.left = left;
    this.right = right;
  }

  /**
   * Method drains the right side input of the NLJ and accumulates the data
   * in a hyper container. Once we have all the data from the right side we
   * process the left side one batch at a time and produce the output batch
   * @return IterOutcome state of the nested loop join batch
   */
  @Override
  public IterOutcome innerNext() {

    // Accumulate batches on the right in a hyper container
    if (state == BatchState.FIRST) {

      // exit if we have an empty left batch
      if (leftUpstream == IterOutcome.NONE) {
        // inform upstream that we don't need anymore data and make sure we clean up any batches already in queue
        killAndDrainRight();
        return IterOutcome.NONE;
      }

      boolean drainRight = true;
      while (drainRight) {
        rightUpstream = next(RIGHT_INPUT, right);
        switch (rightUpstream) {
          case OK_NEW_SCHEMA:
            if (!right.getSchema().equals(rightSchema)) {
              throw new DrillRuntimeException("Nested loop join does not handle schema change. Schema change" +
                  " found on the right side of NLJ.");
            }
            // fall through
          case OK:
            addBatchToHyperContainer(right);
            break;
          case OUT_OF_MEMORY:
            return IterOutcome.OUT_OF_MEMORY;
          case NONE:
          case STOP:
            //TODO we got a STOP, shouldn't we stop immediately ?
          case NOT_YET:
            drainRight = false;
            break;
        }
      }
      nljWorker.setupNestedLoopJoin(context, left, rightContainer, rightCounts, this);
      state = BatchState.NOT_FIRST;
    }

    // allocate space for the outgoing batch
    allocateVectors();

    // invoke the runtime generated method to emit records in the output batch
    outputRecords = nljWorker.outputRecords(popConfig.getJoinType());

    // Set the record count
    for (final VectorWrapper<?> vw : container) {
      vw.getValueVector().getMutator().setValueCount(outputRecords);
    }

    // Set the record count in the container
    container.setRecordCount(outputRecords);
    container.buildSchema(BatchSchema.SelectionVectorMode.NONE);

    logger.debug("Number of records emitted: " + outputRecords);

    return (outputRecords > 0) ? IterOutcome.OK : IterOutcome.NONE;
  }

  private void killAndDrainRight() {
    if (!hasMore(rightUpstream)) {
      return;
    }
    right.kill(true);
    while (hasMore(rightUpstream)) {
      for (final VectorWrapper<?> wrapper : right) {
        wrapper.getValueVector().clear();
      }
      rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
    }
  }

  private boolean hasMore(IterOutcome outcome) {
    return outcome == IterOutcome.OK || outcome == IterOutcome.OK_NEW_SCHEMA;
  }

  /**
   * Method generates the runtime code needed for NLJ. Other than the setup method to set the input and output value
   * vector references we implement three more methods
   * 1. doEval() -> Evaluates if record from left side matches record from the right side
   * 2. emitLeft() -> Project record from the left side
   * 3. emitRight() -> Project record from the right side (which is a hyper container)
   * @return the runtime generated class that implements the NestedLoopJoin interface
   */
  private NestedLoopJoin setupWorker() throws IOException, ClassTransformationException, SchemaChangeException {
    final CodeGenerator<NestedLoopJoin> nLJCodeGenerator = CodeGenerator.get(
        NestedLoopJoin.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    nLJCodeGenerator.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
//    nLJCodeGenerator.saveCodeForDebugging(true);
    final ClassGenerator<NestedLoopJoin> nLJClassGenerator = nLJCodeGenerator.getRoot();

    // generate doEval
    final ErrorCollector collector = new ErrorCollectorImpl();

    /*
        Logical expression may contain fields from left and right batches. During code generation (materialization)
        we need to indicate from which input field should be taken.

        Non-equality joins can belong to one of below categories. For example:
        1. Join on non-equality join predicates:
        select * from t1 inner join t2 on (t1.c1 between t2.c1 AND t2.c2) AND (...)
        2. Join with an OR predicate:
        select * from t1 inner join t2 on on t1.c1 = t2.c1 OR t1.c2 = t2.c2
     */
    Map<VectorAccessible, BatchReference> batches = ImmutableMap
        .<VectorAccessible, BatchReference>builder()
        .put(left, new BatchReference("leftBatch", "leftIndex"))
        .put(rightContainer, new BatchReference("rightContainer", "rightBatchIndex", "rightRecordIndexWithinBatch"))
        .build();

    LogicalExpression materialize = ExpressionTreeMaterializer.materialize(
        popConfig.getCondition(),
        batches,
        collector,
        context.getFunctionRegistry(),
        false,
        false);

    if (collector.hasErrors()) {
      throw new SchemaChangeException(String.format("Failure while trying to materialize join condition. Errors:\n %s.",
          collector.toErrorString()));
    }

    nLJClassGenerator.addExpr(new ReturnValueExpression(materialize), ClassGenerator.BlkCreateMode.FALSE);

    // generate emitLeft
    nLJClassGenerator.setMappingSet(emitLeftMapping);
    JExpression outIndex = JExpr.direct("outIndex");
    JExpression leftIndex = JExpr.direct("leftIndex");

    int fieldId = 0;
    int outputFieldId = 0;
    // Set the input and output value vector references corresponding to the left batch
    for (MaterializedField field : leftSchema) {
      final TypeProtos.MajorType fieldType = field.getType();

      // Add the vector to the output container
      container.addOrGet(field);

      JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch",
          new TypedFieldId(fieldType, false, fieldId));
      JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing",
          new TypedFieldId(fieldType, false, outputFieldId));

      nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(leftIndex).arg(outIndex).arg(inVV));
      nLJClassGenerator.rotateBlock();
      fieldId++;
      outputFieldId++;
    }

    // generate emitRight
    fieldId = 0;
    nLJClassGenerator.setMappingSet(emitRightMapping);
    JExpression batchIndex = JExpr.direct("batchIndex");
    JExpression recordIndexWithinBatch = JExpr.direct("recordIndexWithinBatch");

    // Set the input and output value vector references corresponding to the right batch
    for (MaterializedField field : rightSchema) {

      final TypeProtos.MajorType inputType = field.getType();
      TypeProtos.MajorType outputType;
      // if join type is LEFT, make sure right batch output fields data mode is optional
      if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
        outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
      } else {
        outputType = inputType;
      }

      MaterializedField newField = MaterializedField.create(field.getPath(), outputType);
      container.addOrGet(newField);

      JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer",
          new TypedFieldId(inputType, true, fieldId));
      JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing",
          new TypedFieldId(outputType, false, outputFieldId));
      nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe")
          .arg(recordIndexWithinBatch)
          .arg(outIndex)
          .arg(inVV.component(batchIndex)));
      nLJClassGenerator.rotateBlock();
      fieldId++;
      outputFieldId++;
    }

    return context.getImplementationClass(nLJCodeGenerator);
  }

  /**
   * Simple method to allocate space for all the vectors in the container.
   */
  private void allocateVectors() {
    for (final VectorWrapper<?> vw : container) {
      AllocationHelper.allocateNew(vw.getValueVector(), MAX_BATCH_SIZE);
    }
  }

  /**
   * Builds the output container's schema. Goes over the left and the right
   * batch and adds the corresponding vectors to the output container.
   * @throws SchemaChangeException if batch schema was changed during execution
   */
  @Override
  protected void buildSchema() throws SchemaChangeException {

    try {
      leftUpstream = next(LEFT_INPUT, left);
      rightUpstream = next(RIGHT_INPUT, right);

      if (leftUpstream == IterOutcome.STOP || rightUpstream == IterOutcome.STOP) {
        state = BatchState.STOP;
        return;
      }

      if (leftUpstream == IterOutcome.OUT_OF_MEMORY || rightUpstream == IterOutcome.OUT_OF_MEMORY) {
        state = BatchState.OUT_OF_MEMORY;
        return;
      }

      if (leftUpstream != IterOutcome.NONE) {
        leftSchema = left.getSchema();
        for (final VectorWrapper<?> vw : left) {
          container.addOrGet(vw.getField());
        }
      }

      if (rightUpstream != IterOutcome.NONE) {
        // make right input schema optional if we have LEFT join
        for (final VectorWrapper<?> vectorWrapper : right) {
          TypeProtos.MajorType inputType = vectorWrapper.getField().getType();
          TypeProtos.MajorType outputType;
          if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
            outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
          } else {
            outputType = inputType;
          }
          MaterializedField newField = MaterializedField.create(vectorWrapper.getField().getPath(), outputType);
          ValueVector valueVector = container.addOrGet(newField);
          if (valueVector instanceof AbstractContainerVector) {
            vectorWrapper.getValueVector().makeTransferPair(valueVector);
            valueVector.clear();
          }
        }
        rightSchema = right.getSchema();
        addBatchToHyperContainer(right);
      }

      allocateVectors();
      nljWorker = setupWorker();

      // if left batch is empty, fetch next
      if (leftUpstream != IterOutcome.NONE && left.getRecordCount() == 0) {
        leftUpstream = next(LEFT_INPUT, left);
      }

      container.setRecordCount(0);
      container.buildSchema(BatchSchema.SelectionVectorMode.NONE);

    } catch (ClassTransformationException | IOException e) {
      throw new SchemaChangeException(e);
    }
  }

  private void addBatchToHyperContainer(RecordBatch inputBatch) {
    final RecordBatchData batchCopy = new RecordBatchData(inputBatch, oContext.getAllocator());
    boolean success = false;
    try {
      rightCounts.addLast(inputBatch.getRecordCount());
      rightContainer.addBatch(batchCopy.getContainer());
      success = true;
    } finally {
      if (!success) {
        batchCopy.clear();
      }
    }
  }

  @Override
  public void close() {
    rightContainer.clear();
    rightCounts.clear();
    super.close();
  }

  @Override
  protected void killIncoming(boolean sendUpstream) {
    this.left.kill(sendUpstream);
    this.right.kill(sendUpstream);
  }

  @Override
  public int getRecordCount() {
    return outputRecords;
  }
}