VectorMapJoinCommonOperator.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector.mapjoin;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.HashTableLoaderFactory;
import org.apache.hadoop.hive.ql.exec.HashTableLoader;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping;
import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;

import com.google.common.base.Preconditions;

/**
 * This class is common operator class for native vectorized map join.
 *
 * It contain common initialization logic.
 *
 * It is used by both inner and outer joins.
 */
public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implements VectorizationContextRegion {
  private static final long serialVersionUID = 1L;

  //------------------------------------------------------------------------------------------------

  private static final String CLASS_NAME = VectorMapJoinCommonOperator.class.getName();
private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);

  protected abstract String getLoggingPrefix();

  // For debug tracing: information about the map or reduce task, operator, operator class, etc.
  protected transient String loggingPrefix;

  protected String getLoggingPrefix(String className) {
    if (loggingPrefix == null) {
      initLoggingPrefix(className);
    }
    return loggingPrefix;
  }

  protected void initLoggingPrefix(String className) {
    loggingPrefix = className;
  }

  //------------------------------------------------------------------------------------------------

  protected VectorMapJoinDesc vectorDesc;

  protected VectorMapJoinInfo vectorMapJoinInfo;

  // Whether this operator is an outer join.
  protected boolean isOuterJoin;

  // Position of the *single* native vector map join small table.
  protected byte posSingleVectorMapJoinSmallTable;

  // The incoming vectorization context.  It describes the input big table vectorized row batch.
  protected VectorizationContext vContext;

  // This is the vectorized row batch description of the output of the native vectorized map join
  // operator.  It is based on the incoming vectorization context.  Its projection may include
  // a mixture of input big table columns and new scratch columns.
  protected VectorizationContext vOutContext;

  // The output column projection of the vectorized row batch.  And, the type infos of the output
  // columns.
  protected int[] outputProjection;
  protected TypeInfo[] outputTypeInfos;

  // These are the vectorized batch expressions for filtering, key expressions, and value
  // expressions.
  protected VectorExpression[] bigTableFilterExpressions;
  protected VectorExpression[] bigTableKeyExpressions;
  protected VectorExpression[] bigTableValueExpressions;

  // This is map of which vectorized row batch columns are the big table key columns.  Since
  // we may have key expressions that produce new scratch columns, we need a mapping.
  // And, we have their type infos.
  protected int[] bigTableKeyColumnMap;
  protected String[] bigTableKeyColumnNames;
  protected TypeInfo[] bigTableKeyTypeInfos;

  // Similarly, this is map of which vectorized row batch columns are the big table value columns.
  // Since we may have value expressions that produce new scratch columns, we need a mapping.
  // And, we have their type infos.
  protected int[] bigTableValueColumnMap;
  protected String[] bigTableValueColumnNames;
  protected TypeInfo[] bigTableValueTypeInfos;

  // This is a mapping of which big table columns (input and key/value expressions) will be
  // part of the big table portion of the join output result.
  protected VectorColumnOutputMapping bigTableRetainedMapping;

  // This is a mapping of which keys will be copied from the big table (input and key expressions)
  // to the small table result portion of the output for outer join.
  protected VectorColumnOutputMapping bigTableOuterKeyMapping;

  // This is a mapping of the values in the small table hash table that will be copied to the
  // small table result portion of the output.  That is, a mapping of the LazyBinary field order
  // to output batch scratch columns for the small table portion.
  protected VectorColumnSourceMapping smallTableMapping;

  protected VectorColumnSourceMapping projectionMapping;

  // These are the output columns for the small table and the outer small table keys.
  protected int[] smallTableOutputVectorColumns;
  protected int[] bigTableOuterKeyOutputVectorColumns;

  // These are the columns in the big and small table that are ByteColumnVector columns.
  // We create data buffers for these columns so we can copy strings into those columns by value.
  protected int[] bigTableByteColumnVectorColumns;
  protected int[] smallTableByteColumnVectorColumns;

  // The above members are initialized by the constructor and must not be
  // transient.
  //---------------------------------------------------------------------------

  // The threshold where we should use a repeating vectorized row batch optimization for
  // generating join output results.
  protected transient boolean useOverflowRepeatedThreshold;
  protected transient int overflowRepeatedThreshold;

  // A helper object that efficiently copies the big table columns that are for the big table
  // portion of the join output.
  protected transient VectorCopyRow bigTableRetainedVectorCopy;

  // A helper object that efficiently copies the big table key columns (input or key expressions)
  // that appear in the small table portion of the join output for outer joins.
  protected transient VectorCopyRow bigTableVectorCopyOuterKeys;

  // This helper object deserializes LazyBinary format small table values into columns of a row
  // in a vectorized row batch.
  protected transient VectorDeserializeRow<LazyBinaryDeserializeRead> smallTableVectorDeserializeRow;

  // This a 2nd batch with the same "column schema" as the big table batch that can be used to
  // build join output results in.  If we can create some join output results in the big table
  // batch, we will for better efficiency (i.e. avoiding copying).  Otherwise, we will use the
  // overflow batch.
  protected transient VectorizedRowBatch overflowBatch;

  // A scratch batch that will be used to play back big table rows that were spilled
  // to disk for the Hybrid Grace hash partitioning.
  protected transient VectorizedRowBatch spillReplayBatch;

  // Whether the native vectorized map join operator has performed its common setup.
  protected transient boolean needCommonSetup;

  // Whether the native vectorized map join operator has performed its
  // native vector map join hash table setup.
  protected transient boolean needHashTableSetup;

  // The small table hash table for the native vectorized map join operator.
  protected transient VectorMapJoinHashTable vectorMapJoinHashTable;

  /** Kryo ctor. */
  protected VectorMapJoinCommonOperator() {
    super();
  }

  public VectorMapJoinCommonOperator(CompilationOpContext ctx) {
    super(ctx);
  }

  public VectorMapJoinCommonOperator(CompilationOpContext ctx,
      VectorizationContext vContext, OperatorDesc conf) throws HiveException {
    super(ctx);

    MapJoinDesc desc = (MapJoinDesc) conf;
    this.conf = desc;
    vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
    vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    Preconditions.checkState(vectorMapJoinInfo != null);

    this.vContext = vContext;

    /*
     * Create a new vectorization context to create a new projection, but keep
     * same output column manager must be inherited to track the scratch the columns.
     */
    vOutContext = new VectorizationContext(getName(), this.vContext);

    order = desc.getTagOrder();
    posBigTable = (byte) desc.getPosBigTable();
    posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    isOuterJoin = !desc.getNoOuterJoin();

    Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
    bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable),
        VectorExpressionDescriptor.Mode.FILTER);

    bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap();
    bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames();
    bigTableKeyTypeInfos = vectorMapJoinInfo.getBigTableKeyTypeInfos();
    bigTableKeyExpressions = vectorMapJoinInfo.getBigTableKeyExpressions();

    bigTableValueColumnMap = vectorMapJoinInfo.getBigTableValueColumnMap();
    bigTableValueColumnNames = vectorMapJoinInfo.getBigTableValueColumnNames();
    bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos();
    bigTableValueExpressions = vectorMapJoinInfo.getBigTableValueExpressions();

    bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping();

    bigTableOuterKeyMapping =  vectorMapJoinInfo.getBigTableOuterKeyMapping();

    smallTableMapping = vectorMapJoinInfo.getSmallTableMapping();

    projectionMapping = vectorMapJoinInfo.getProjectionMapping();

    determineCommonInfo(isOuterJoin);
  }

  protected void determineCommonInfo(boolean isOuter) throws HiveException {

    bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns();
    smallTableOutputVectorColumns = smallTableMapping.getOutputColumns();

    // Which big table and small table columns are ByteColumnVector and need have their data buffer
    // to be manually reset for some join result processing?

    bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping);

    smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping);

    outputProjection = projectionMapping.getOutputColumns();
    outputTypeInfos = projectionMapping.getTypeInfos();

    if (isLogDebugEnabled) {
      int[] orderDisplayable = new int[order.length];
      for (int i = 0; i < order.length; i++) {
        orderDisplayable[i] = (int) order[i];
      }
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable);
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable);

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos));

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos));

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString());

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString());

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString());

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns));

      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection));
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos));
    }

    setupVOutContext(conf.getOutputColumnNames());
  }

  /**
   * Determine from a mapping which columns are BytesColumnVector columns.
   */
  private int[] getByteColumnVectorColumns(VectorColumnMapping mapping) {
    // Search mapping for any strings and return their output columns.
    ArrayList<Integer> list = new ArrayList<Integer>();
    int count = mapping.getCount();
    int[] outputColumns = mapping.getOutputColumns();
    TypeInfo[] typeInfos = mapping.getTypeInfos();
    for (int i = 0; i < count; i++) {
      int outputColumn = outputColumns[i];
      String typeName = typeInfos[i].getTypeName();
      if (VectorizationContext.isStringFamily(typeName)) {
        list.add(outputColumn);
      }
    }
    return ArrayUtils.toPrimitive(list.toArray(new Integer[0]));
  }

  /**
   * Setup the vectorized row batch description of the output of the native vectorized map join
   * operator.  Use the output projection we previously built from a mixture of input big table
   * columns and new scratch columns.
   */
  protected void setupVOutContext(List<String> outputColumnNames) {
    if (isLogDebugEnabled) {
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames);
    }
    if (outputColumnNames.size() != outputProjection.length) {
      throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeInfos) + " length mismatch");
    }
    vOutContext.resetProjectionColumns();
    for (int i = 0; i < outputColumnNames.size(); ++i) {
      String columnName = outputColumnNames.get(i);
      int outputColumn = outputProjection[i];
      vOutContext.addProjectionColumn(columnName, outputColumn);

      if (isLogDebugEnabled) {
        LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn);
      }
    }
  }

  /**
   * This override lets us substitute our own fast vectorized hash table loader.
   */
  @Override
  protected HashTableLoader getHashTableLoader(Configuration hconf) {
    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc();
    HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType();
    HashTableLoader hashTableLoader;
    switch (vectorDesc.hashTableImplementationType()) {
    case OPTIMIZED:
      // Use the Tez hash table loader.
      hashTableLoader = HashTableLoaderFactory.getLoader(hconf);
      break;
    case FAST:
      // Use our specialized hash table loader.
      hashTableLoader = HiveConf.getVar(
          hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark") ?
          HashTableLoaderFactory.getLoader(hconf) : new VectorMapJoinFastHashTableLoader();
      break;
    default:
      throw new RuntimeException("Unknown vector map join hash table implementation type " + hashTableImplementationType.name());
    }
    return hashTableLoader;
  }

  @Override
  protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);

    /*
     * Get configuration parameters.
     */
    overflowRepeatedThreshold = HiveConf.getIntVar(hconf,
        HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD);
    useOverflowRepeatedThreshold = (overflowRepeatedThreshold >= 0);


    /*
     * Create our vectorized copy row and deserialize row helper objects.
     */
    if (smallTableMapping.getCount() > 0) {
      smallTableVectorDeserializeRow =
          new VectorDeserializeRow<LazyBinaryDeserializeRead>(
              new LazyBinaryDeserializeRead(
                  smallTableMapping.getTypeInfos(),
                  /* useExternalBuffer */ true));
      smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns());
    }

    if (bigTableRetainedMapping.getCount() > 0) {
      bigTableRetainedVectorCopy = new VectorCopyRow();
      bigTableRetainedVectorCopy.init(bigTableRetainedMapping);
    }

    if (bigTableOuterKeyMapping.getCount() > 0) {
      bigTableVectorCopyOuterKeys = new VectorCopyRow();
      bigTableVectorCopyOuterKeys.init(bigTableOuterKeyMapping);
    }

    /*
     * Setup the overflow batch.
     */
    overflowBatch = setupOverflowBatch();

    needCommonSetup = true;
    needHashTableSetup = true;

    if (isLogDebugEnabled) {
      int[] currentScratchColumns = vOutContext.currentScratchColumns();
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns));

      StructObjectInspector structOutputObjectInspector = (StructObjectInspector) outputObjInspector;
      List<? extends StructField> fields = structOutputObjectInspector.getAllStructFieldRefs();
      int i = 0;
      for (StructField field : fields) {
        LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName());
        i++;
      }
    }
  }

  @Override
  protected void completeInitializationOp(Object[] os) throws HiveException {
    // setup mapJoinTables and serdes
    super.completeInitializationOp(os);

    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc();
    HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType();
    switch (vectorDesc.hashTableImplementationType()) {
    case OPTIMIZED:
      {
        // Create our vector map join optimized hash table variation *above* the
        // map join table container.
        vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf,
                mapJoinTables[posSingleVectorMapJoinSmallTable]);
      }
      break;

    case FAST:
      {
        // Get our vector map join fast hash table variation from the
        // vector map join table container.
        VectorMapJoinTableContainer vectorMapJoinTableContainer =
                (VectorMapJoinTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
        vectorMapJoinHashTable = vectorMapJoinTableContainer.vectorMapJoinHashTable();
      }
      break;
    default:
      throw new RuntimeException("Unknown vector map join hash table implementation type " + hashTableImplementationType.name());
    }
    LOG.info("Using " + vectorMapJoinHashTable.getClass().getSimpleName() + " from " + this.getClass().getSimpleName());
  }

  /*
   * Setup our 2nd batch with the same "column schema" as the big table batch that can be used to
   * build join output results in.
   */
  protected VectorizedRowBatch setupOverflowBatch() throws HiveException {

    int initialColumnCount = vContext.firstOutputColumnIndex();
    VectorizedRowBatch overflowBatch;

    int totalNumColumns = initialColumnCount + vOutContext.getScratchColumnTypeNames().length;
    overflowBatch = new VectorizedRowBatch(totalNumColumns);

    // First, just allocate just the projection columns we will be using.
    for (int i = 0; i < outputProjection.length; i++) {
      int outputColumn = outputProjection[i];
      String typeName = outputTypeInfos[i].getTypeName();
      allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName);
    }

    // Now, add any scratch columns needed for children operators.
    int outputColumn = initialColumnCount;
    for (String typeName : vOutContext.getScratchColumnTypeNames()) {
      allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName);
    }

    overflowBatch.projectedColumns = outputProjection;
    overflowBatch.projectionSize = outputProjection.length;

    overflowBatch.reset();

    return overflowBatch;
  }

  /*
   * Allocate overflow batch columns by hand.
   */
  private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, int outputColumn,
              String typeName) throws HiveException {

    if (overflowBatch.cols[outputColumn] == null) {
      typeName = VectorizationContext.mapTypeNameSynonyms(typeName);

      TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);

      overflowBatch.cols[outputColumn] = VectorizedBatchUtil.createColumnVector(typeInfo);

      if (isLogDebugEnabled) {
        LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName());
      }
    }
  }

  /*
   * Common one time setup by native vectorized map join operator's processOp.
   */
  protected void commonSetup(VectorizedRowBatch batch) throws HiveException {

    if (isLogDebugEnabled) {
      LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin...");
      displayBatchColumns(batch, "batch");
      displayBatchColumns(overflowBatch, "overflowBatch");
    }

    // Make sure big table BytesColumnVectors have room for string values in the overflow batch...
    for (int column: bigTableByteColumnVectorColumns) {
      BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column];
      bytesColumnVector.initBuffer();
    }

    // Make sure small table BytesColumnVectors have room for string values in the big table and
    // overflow batchs...
    for (int column: smallTableByteColumnVectorColumns) {
      BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column];
      bytesColumnVector.initBuffer();
      bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column];
      bytesColumnVector.initBuffer();
    }

    // Setup a scratch batch that will be used to play back big table rows that were spilled
    // to disk for the Hybrid Grace hash partitioning.
    spillReplayBatch = VectorizedBatchUtil.makeLike(batch);
  }

  protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) {
    LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols);
    for (int column = 0; column < batch.numCols; column++) {
      LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + "     column " + column + " type " + (batch.cols[column] == null ? "NULL" : batch.cols[column].getClass().getSimpleName()));
    }
  }

  @Override
  public OperatorType getType() {
    return OperatorType.MAPJOIN;
  }

  @Override
  public VectorizationContext getOuputVectorizationContext() {
    return vOutContext;
  }
}