VectorMapOperator.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.exec.AbstractMapOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc;
import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Writable;

import com.google.common.base.Preconditions;

/*
 *
 * The vectorized MapOperator.
 *
 * There are 3 modes of reading for vectorization:
 *
 *   1) One for the Vectorized Input File Format which returns VectorizedRowBatch as the row.
 *
 *   2) One for using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
 *      Currently, these Input File Formats:
 *        TEXTFILE
 *        SEQUENCEFILE
 *
 *   3) And one using the regular partition deserializer to get the row object and assigning
 *      the row object into the VectorizedRowBatch with VectorAssignRow.
 *      This picks up Input File Format not supported by the other two.
 */
public class VectorMapOperator extends AbstractMapOperator {

  private static final long serialVersionUID = 1L;

  /*
   * Overall information on this vectorized Map operation.
   */
  private transient HashMap<String, VectorPartitionContext> fileToPartitionContextMap;

  private transient Operator<? extends OperatorDesc> oneRootOperator;

  private transient TypeInfo tableStructTypeInfo;
  private transient StandardStructObjectInspector tableStandardStructObjectInspector;

  private transient TypeInfo[] tableRowTypeInfos;

  private transient int[] dataColumnNums;

  private transient StandardStructObjectInspector neededStandardStructObjectInspector;

  private transient VectorizedRowBatchCtx batchContext;
              // The context for creating the VectorizedRowBatch for this Map node that
              // the Vectorizer class determined.

  /*
   * A different batch for vectorized Input File Format readers so they can do their work
   * overlapped with work of the row collection that vector/row deserialization does.  This allows
   * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
   */
  private transient VectorizedRowBatch vectorizedInputFileFormatBatch;

  /*
   * This batch is only used by vector/row deserializer readers.
   */
  private transient VectorizedRowBatch deserializerBatch;

  private transient long batchCounter;

  private transient int dataColumnCount;
  private transient int partitionColumnCount;
  private transient Object[] partitionValues;

  private transient boolean[] dataColumnsToIncludeTruncated;

  /*
   * The following members have context information for the current partition file being read.
   */
  private transient VectorMapOperatorReadType currentReadType;
  private transient VectorPartitionContext currentVectorPartContext;
                  // Current vector map operator read type and context.

  private transient int currentDataColumnCount;
                  // The number of data columns that the current reader will return.
                  // Only applicable for vector/row deserialization.

  private transient DeserializeRead currentDeserializeRead;
  private transient VectorDeserializeRow currentVectorDeserializeRow;
                  // When we are doing vector deserialization, these are the fast deserializer and
                  // the vector row deserializer.

  private Deserializer currentPartDeserializer;
  private StructObjectInspector currentPartRawRowObjectInspector;
  private VectorAssignRow currentVectorAssign;
                  // When we are doing row deserialization, these are the regular deserializer,
                  // partition object inspector, and vector row assigner.

  /*
   * The abstract context for the 3 kinds of vectorized reading.
   */
  protected abstract class VectorPartitionContext {

    protected final PartitionDesc partDesc;

    String tableName;
    String partName;

    /*
     * Initialization here is adapted from MapOperator.MapOpCtx.initObjectInspector method.
     */
    private VectorPartitionContext(PartitionDesc partDesc) {
      this.partDesc = partDesc;

      TableDesc td = partDesc.getTableDesc();

      // Use table properties in case of unpartitioned tables,
      // and the union of table properties and partition properties, with partition
      // taking precedence, in the case of partitioned tables
      Properties overlayedProps =
          SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties());

      Map<String, String> partSpec = partDesc.getPartSpec();

      tableName = String.valueOf(overlayedProps.getProperty("name"));
      partName = String.valueOf(partSpec);

    }

    public PartitionDesc getPartDesc() {
      return partDesc;
    }

    /*
     * Override this for concrete initialization.
     */
    public abstract void init(Configuration hconf)
        throws SerDeException, Exception;

    /*
     * How many data columns is the partition reader actually supplying?
     */
    public abstract int getReaderDataColumnCount();
  }

  /*
   * Context for reading a Vectorized Input File Format.
   */
  protected class VectorizedInputFileFormatPartitionContext extends VectorPartitionContext {

    private VectorizedInputFileFormatPartitionContext(PartitionDesc partDesc) {
      super(partDesc);
    }

    public void init(Configuration hconf) {
    }

    @Override
    public int getReaderDataColumnCount() {
      throw new RuntimeException("Not applicable");
    }
  }

  /*
   * Context for using VectorDeserializeRow to deserialize each row from the Input File Format
   * into the VectorizedRowBatch.
   */
  protected class VectorDeserializePartitionContext extends VectorPartitionContext {

    // This helper object deserializes known deserialization / input file format combination into
    // columns of a row in a vectorized row batch.
    private VectorDeserializeRow vectorDeserializeRow;

    private DeserializeRead deserializeRead;

    private int readerColumnCount;

    private VectorDeserializePartitionContext(PartitionDesc partDesc) {
      super(partDesc);
    }

    public VectorDeserializeRow getVectorDeserializeRow() {
      return vectorDeserializeRow;
    }

    DeserializeRead getDeserializeRead() {
      return deserializeRead;
    }

    @Override
    public int getReaderDataColumnCount() {
      return readerColumnCount;
    }

    public void init(Configuration hconf)
        throws SerDeException, HiveException {
      VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();

      // This type information specifies the data types the partition needs to read.
      TypeInfo[] dataTypeInfos = vectorPartDesc.getDataTypeInfos();

      // We need to provide the minimum number of columns to be read so
      // LazySimpleDeserializeRead's separator parser does not waste time.
      //
      Preconditions.checkState(dataColumnsToIncludeTruncated != null);
      TypeInfo[] minimalDataTypeInfos;
      if (dataColumnsToIncludeTruncated.length < dataTypeInfos.length) {
        minimalDataTypeInfos =
            Arrays.copyOf(dataTypeInfos, dataColumnsToIncludeTruncated.length);
      } else {
        minimalDataTypeInfos = dataTypeInfos;
      }

      readerColumnCount = minimalDataTypeInfos.length;

      switch (vectorPartDesc.getVectorDeserializeType()) {
      case LAZY_SIMPLE:
        {
          LazySerDeParameters simpleSerdeParams =
              new LazySerDeParameters(hconf, partDesc.getTableDesc().getProperties(),
                  LazySimpleSerDe.class.getName());

          LazySimpleDeserializeRead lazySimpleDeserializeRead =
              new LazySimpleDeserializeRead(
                  minimalDataTypeInfos,
                  /* useExternalBuffer */ true,
                  simpleSerdeParams);

          vectorDeserializeRow =
              new VectorDeserializeRow<LazySimpleDeserializeRead>(lazySimpleDeserializeRead);

          // Initialize with data row type conversion parameters.
          vectorDeserializeRow.initConversion(tableRowTypeInfos, dataColumnsToIncludeTruncated);

          deserializeRead = lazySimpleDeserializeRead;
        }
        break;

      case LAZY_BINARY:
        {
          LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
              new LazyBinaryDeserializeRead(
                  dataTypeInfos,
                  /* useExternalBuffer */ true);

          vectorDeserializeRow =
              new VectorDeserializeRow<LazyBinaryDeserializeRead>(lazyBinaryDeserializeRead);

          // Initialize with data row type conversion parameters.
          vectorDeserializeRow.initConversion(tableRowTypeInfos, dataColumnsToIncludeTruncated);

          deserializeRead = lazyBinaryDeserializeRead;
        }
        break;

      default:
        throw new RuntimeException(
            "Unexpected vector deserialize row type " + vectorPartDesc.getVectorDeserializeType().name());
      }
    }
  }

  /*
   * Context for reading using the regular partition deserializer to get the row object and
   * assigning the row object into the VectorizedRowBatch with VectorAssignRow
   */
  protected class RowDeserializePartitionContext extends VectorPartitionContext {

    private Deserializer partDeserializer;
    private StructObjectInspector partRawRowObjectInspector;
    private VectorAssignRow vectorAssign;

    private int readerColumnCount;

    private RowDeserializePartitionContext(PartitionDesc partDesc) {
      super(partDesc);
    }

    public Deserializer getPartDeserializer() {
      return partDeserializer;
    }

    public StructObjectInspector getPartRawRowObjectInspector() {
      return partRawRowObjectInspector;
    }

    public VectorAssignRow getVectorAssign() {
      return vectorAssign;
    }

    @Override
    public int getReaderDataColumnCount() {
      return readerColumnCount;
    }

    public void init(Configuration hconf)
        throws Exception {
      VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();

      partDeserializer = partDesc.getDeserializer(hconf);

      if (partDeserializer instanceof OrcSerde) {

        // UNDONE: We need to get the table schema inspector from self-describing Input File
        //         Formats like ORC.  Modify the ORC serde instead?  For now, this works.

        partRawRowObjectInspector =
            (StructObjectInspector) OrcStruct.createObjectInspector(tableStructTypeInfo);

      } else {
        partRawRowObjectInspector =
            (StructObjectInspector) partDeserializer.getObjectInspector();
      }

      TypeInfo[] dataTypeInfos = vectorPartDesc.getDataTypeInfos();

      vectorAssign = new VectorAssignRow();

      // Initialize with data type conversion parameters.
      readerColumnCount =
          vectorAssign.initConversion(dataTypeInfos, tableRowTypeInfos, dataColumnsToIncludeTruncated);
    }
  }

  public VectorPartitionContext createAndInitPartitionContext(PartitionDesc partDesc,
      Configuration hconf)
          throws SerDeException, Exception {

    VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
    VectorPartitionContext vectorPartitionContext;
    VectorMapOperatorReadType vectorMapOperatorReadType =
        vectorPartDesc.getVectorMapOperatorReadType();

    if (vectorMapOperatorReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE ||
        vectorMapOperatorReadType == VectorMapOperatorReadType.ROW_DESERIALIZE) {
      // Verify hive.exec.schema.evolution is true or we have an ACID table so we are producing
      // the table schema from ORC.  The Vectorizer class assures this.
      boolean isAcid =
          AcidUtils.isTablePropertyTransactional(partDesc.getTableDesc().getProperties());
      Preconditions.checkState(Utilities.isSchemaEvolutionEnabled(hconf, isAcid));
    }

    switch (vectorMapOperatorReadType) {
    case VECTORIZED_INPUT_FILE_FORMAT:
      vectorPartitionContext = new VectorizedInputFileFormatPartitionContext(partDesc);
      break;

    case VECTOR_DESERIALIZE:
      vectorPartitionContext = new VectorDeserializePartitionContext(partDesc);
      break;

    case ROW_DESERIALIZE:
      vectorPartitionContext = new RowDeserializePartitionContext(partDesc);
      break;

    default:
      throw new RuntimeException("Unexpected vector MapOperator read type " +
          vectorMapOperatorReadType.name());
    }

    vectorPartitionContext.init(hconf);

    return vectorPartitionContext;
  }

  private void determineDataColumnsToIncludeTruncated() {

    Preconditions.checkState(batchContext != null);
    Preconditions.checkState(dataColumnNums != null);

    boolean[] columnsToInclude = new boolean[dataColumnCount];;
    final int count = dataColumnNums.length;
    int columnNum = -1;
    for (int i = 0; i < count; i++) {
      columnNum = dataColumnNums[i];
      Preconditions.checkState(columnNum < dataColumnCount);
      columnsToInclude[columnNum] = true;
    }

    if (columnNum == -1) {
      dataColumnsToIncludeTruncated = new boolean[0];
    } else {
      dataColumnsToIncludeTruncated = Arrays.copyOf(columnsToInclude, columnNum + 1);
    }
  }

  /** Kryo ctor. */
  public VectorMapOperator() {
    super();
  }

  public VectorMapOperator(CompilationOpContext ctx) {
    super(ctx);
  }


  /*
   * This is the same as the setChildren method below but for empty tables.
   */
  @Override
  public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf)
    throws SerDeException, Exception {

    // Get the single TableScanOperator.  Vectorization only supports one input tree.
    Preconditions.checkState(children.size() == 1);
    oneRootOperator = children.get(0);

    internalSetChildren(hconf);
  }

  @Override
  public void setChildren(Configuration hconf) throws Exception {

    // Get the single TableScanOperator.  Vectorization only supports one input tree.
    Iterator<Operator<? extends OperatorDesc>> aliasToWorkIterator =
        conf.getAliasToWork().values().iterator();
    oneRootOperator = aliasToWorkIterator.next();
    Preconditions.checkState(!aliasToWorkIterator.hasNext());

    internalSetChildren(hconf);
  }

  /*
   * Create information for vector map operator.
   * The member oneRootOperator has been set.
   */
  private void internalSetChildren(Configuration hconf) throws Exception {

    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;

    batchContext = conf.getVectorizedRowBatchCtx();
    /*
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
     */
    vectorizedInputFileFormatBatch =
        batchContext.createVectorizedRowBatch();
    conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);

    /*
     * This batch is used by vector/row deserializer readers.
     */
    deserializerBatch = batchContext.createVectorizedRowBatch();

    batchCounter = 0;

    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];

    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);

    // Form a truncated boolean include array for our vector/row deserializers.
    determineDataColumnsToIncludeTruncated();

    /*
     * Create table related objects
     */
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo =
        TypeInfoFactory.getStructTypeInfo(
            Arrays.asList(rowColumnNames),
            Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector =
        (StandardStructObjectInspector)
            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);

    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();

    /*
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     *
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     *
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     */

    /*
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
     */
    fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();

    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap =
        new HashMap<PartitionDesc, VectorPartitionContext>();

    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
      Path path = entry.getKey();
      PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);

      VectorPartitionContext vectorPartitionContext;
      if (!partitionContextMap.containsKey(partDesc)) {
        vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
        partitionContextMap.put(partDesc, vectorPartitionContext);
      } else {
        vectorPartitionContext = partitionContextMap.get(partDesc);
      }

      fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
    }

    // Create list of one.
    List<Operator<? extends OperatorDesc>> children =
        new ArrayList<Operator<? extends OperatorDesc>>();
    children.add(oneRootOperator);

    setChildOperators(children);
  }

  @Override
  public void initializeMapOperator(Configuration hconf) throws HiveException {
    super.initializeMapOperator(hconf);

    oneRootOperator.initialize(hconf, new ObjectInspector[] {tableStandardStructObjectInspector});
  }

  public void initializeContexts() throws HiveException {
    Path fpath = getExecContext().getCurrentInputPath();
    String nominalPath = getNominalPath(fpath);
    setupPartitionContextVars(nominalPath);
  }

  // Find context for current input file
  @Override
  public void cleanUpInputFileChangedOp() throws HiveException {
    super.cleanUpInputFileChangedOp();
    Path fpath = getExecContext().getCurrentInputPath();
    String nominalPath = getNominalPath(fpath);

    setupPartitionContextVars(nominalPath);

    // Add alias, table name, and partitions to hadoop conf so that their
    // children will inherit these
    oneRootOperator.setInputContext(currentVectorPartContext.tableName,
        currentVectorPartContext.partName);
  }

  /*
   * Setup the context for reading from the next partition file.
   */
  private void setupPartitionContextVars(String nominalPath) throws HiveException {

    currentVectorPartContext = fileToPartitionContextMap.get(nominalPath);
    PartitionDesc partDesc = currentVectorPartContext.getPartDesc();
    VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
    currentReadType = vectorPartDesc.getVectorMapOperatorReadType();

    /*
     * Setup for 3 different kinds of vectorized reading supported:
     *
     *   1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
     *
     *   2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
     *
     *   3) And read using the regular partition deserializer to get the row object and assigning
     *      the row object into the VectorizedRowBatch with VectorAssignRow.
     */
    if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {

      /*
       * The Vectorized Input File Format reader is responsible for setting the partition column
       * values, resetting and filling in the batch, etc.
       */

      /*
       * Clear all the reading variables.
       */
      currentDataColumnCount = 0;

      currentDeserializeRead = null;
      currentVectorDeserializeRow = null;

      currentPartDeserializer = null;
      currentPartRawRowObjectInspector = null;
      currentVectorAssign = null;

    } else {

      /*
       * We will get "regular" single rows from the Input File Format reader that we will need
       * to {vector|row} deserialize.
       */
      Preconditions.checkState(
          currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE ||
          currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);

      if (deserializerBatch.size > 0) {

        /*
         * Clear out any rows in the batch from previous partition since we are going to change
         * the repeating partition column values.
         */
        batchCounter++;
        oneRootOperator.process(deserializerBatch, 0);
        deserializerBatch.reset();
        if (oneRootOperator.getDone()) {
          setDone(true);
          return;
        }

      }

      /*
       * For this particular file, how many columns will we actually read?
       */
      currentDataColumnCount = currentVectorPartContext.getReaderDataColumnCount();

      if (currentDataColumnCount < dataColumnCount) {

        /*
         * Default any additional data columns to NULL once for the file (if they are present).
         */
        for (int i = currentDataColumnCount; i < dataColumnCount; i++) {
          ColumnVector colVector = deserializerBatch.cols[i];
          if (colVector != null) {
            colVector.isNull[0] = true;
            colVector.noNulls = false;
            colVector.isRepeating = true;
          }
        }
      }

      if (batchContext.getPartitionColumnCount() > 0) {

        /*
         * The partition columns are set once for the partition and are marked repeating.
         */
        VectorizedRowBatchCtx.getPartitionValues(batchContext, partDesc, partitionValues);
        batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues);
      }

      /*
       * Set or clear the rest of the reading variables based on {vector|row} deserialization.
       */
      switch (currentReadType) {
      case VECTOR_DESERIALIZE:
        {
          VectorDeserializePartitionContext vectorDeserPartContext =
              (VectorDeserializePartitionContext) currentVectorPartContext;

          // Set ours.
          currentDeserializeRead = vectorDeserPartContext.getDeserializeRead();
          currentVectorDeserializeRow = vectorDeserPartContext.getVectorDeserializeRow();

          // Clear the other ones.
          currentPartDeserializer = null;
          currentPartRawRowObjectInspector = null;
          currentVectorAssign = null;

        }
        break;

      case ROW_DESERIALIZE:
        {
          RowDeserializePartitionContext rowDeserPartContext =
              (RowDeserializePartitionContext) currentVectorPartContext;

          // Clear the other ones.
          currentDeserializeRead = null;
          currentVectorDeserializeRow = null;

          // Set ours.
          currentPartDeserializer = rowDeserPartContext.getPartDeserializer();
          currentPartRawRowObjectInspector = rowDeserPartContext.getPartRawRowObjectInspector();
          currentVectorAssign = rowDeserPartContext.getVectorAssign();
        }
        break;

      default:
        throw new RuntimeException("Unexpected VectorMapOperator read type " +
            currentReadType.name());
      }
    }
  }

  @Override
  public Deserializer getCurrentDeserializer() {
    // Not applicable.
    return null;
  }

  @Override
  public void process(Writable value) throws HiveException {

    // A mapper can span multiple files/partitions.
    // The VectorPartitionContext need to be changed if the input file changed
    ExecMapperContext context = getExecContext();
    if (context != null && context.inputFileChanged()) {
      // The child operators cleanup if input file has changed
      cleanUpInputFileChanged();
    }
    if (!oneRootOperator.getDone()) {

      /*
       * 3 different kinds of vectorized reading supported:
       *
       *   1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
       *
       *   2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
       *
       *   3) And read using the regular partition deserializer to get the row object and assigning
       *      the row object into the VectorizedRowBatch with VectorAssignRow.
       */
      try {
        if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {

          /*
           * The Vectorized Input File Format reader has already set the partition column
           * values, reset and filled in the batch, etc.
           *
           * We pass the VectorizedRowBatch through here.
           */
          batchCounter++;
          if (value != null) {
            numRows += ((VectorizedRowBatch) value).size;
          }
          oneRootOperator.process(value, 0);
          if (oneRootOperator.getDone()) {
            setDone(true);
            return;
          }

        } else {

          /*
           * We have a "regular" single rows from the Input File Format reader that we will need
           * to deserialize.
           */
          Preconditions.checkState(
              currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE ||
              currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);

          if (deserializerBatch.size == deserializerBatch.DEFAULT_SIZE) {
            numRows += deserializerBatch.size;

            /*
             * Feed current full batch to operator tree.
             */
            batchCounter++;
            oneRootOperator.process(deserializerBatch, 0);

            /**
             * Only reset the current data columns.  Not any data columns defaulted to NULL
             * because they are not present in the partition, and not partition columns.
             */
            for (int c = 0; c < currentDataColumnCount; c++) {
              ColumnVector colVector = deserializerBatch.cols[c];
              if (colVector != null) {
                colVector.reset();
                colVector.init();
              }
            }
            deserializerBatch.selectedInUse = false;
            deserializerBatch.size = 0;
            deserializerBatch.endOfFile = false;

            if (oneRootOperator.getDone()) {
              setDone(true);
              return;
            }
          }

          /*
           * Do the {vector|row} deserialization of the one row into the VectorizedRowBatch.
           */
          switch (currentReadType) {
          case VECTOR_DESERIALIZE:
            {
              BinaryComparable binComp = (BinaryComparable) value;
              currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength());

              // Deserialize and append new row using the current batch size as the index.
              try {
                currentVectorDeserializeRow.deserialize(
                    deserializerBatch, deserializerBatch.size++);
              } catch (Exception e) {
                throw new HiveException(
                    "\nDeserializeRead detail: " +
                        currentVectorDeserializeRow.getDetailedReadPositionString(),
                    e);
              }
            }
            break;

          case ROW_DESERIALIZE:
            {
              Object deserialized = currentPartDeserializer.deserialize(value);

              // Note: Regardless of what the Input File Format returns, we have determined
              // with VectorAppendRow.initConversion that only currentDataColumnCount columns
              // have values we want.
              //
              // Any extra columns needed by the table schema were set to repeating null
              // in the batch by setupPartitionContextVars.

              // Convert input row to standard objects.
              List<Object> standardObjects = new ArrayList<Object>();
              ObjectInspectorUtils.copyToStandardObject(standardObjects, deserialized,
                  currentPartRawRowObjectInspector, ObjectInspectorCopyOption.WRITABLE);
              if (standardObjects.size() < currentDataColumnCount) {
                throw new HiveException("Input File Format returned row with too few columns");
              }

              // Append the deserialized standard object row using the current batch size
              // as the index.
              currentVectorAssign.assignRow(deserializerBatch, deserializerBatch.size++,
                  standardObjects, currentDataColumnCount);
            }
            break;

          default:
            throw new RuntimeException("Unexpected vector MapOperator read type " +
                currentReadType.name());
          }
        }
      } catch (Exception e) {
        throw new HiveException("Hive Runtime Error while processing row ", e);
      }
    }
  }

  @Override
  public void process(Object row, int tag) throws HiveException {
    throw new HiveException("Hive 2 Internal error: should not be called!");
  }

  @Override
  public void closeOp(boolean abort) throws HiveException {
    if (!abort && oneRootOperator != null && !oneRootOperator.getDone() &&
        currentReadType != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
      if (deserializerBatch.size > 0) {
        numRows += deserializerBatch.size;
        batchCounter++;
        oneRootOperator.process(deserializerBatch, 0);
        deserializerBatch.size = 0;
      }
    }
    super.closeOp(abort);
  }

  @Override
  public String getName() {
    return getOperatorName();
  }

  static public String getOperatorName() {
    return "MAP";
  }

  @Override
  public OperatorType getType() {
    return null;
  }
}