VectorizedBatchUtil.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector;

import java.io.IOException;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hive.common.util.DateUtils;

public class VectorizedBatchUtil {
  private static final Logger LOG = LoggerFactory.getLogger(VectorizedBatchUtil.class);

  /**
   * Sets the IsNull value for ColumnVector at specified index
   * @param cv
   * @param rowIndex
   */
  public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) {
    cv.isNull[rowIndex] = true;
    if (cv.noNulls) {
      cv.noNulls = false;
    }
  }

  /**
   * Iterates thru all the column vectors and sets noNull to
   * specified value.
   *
   * @param batch
   *          Batch on which noNull is set
   */
  public static void setNoNullFields(VectorizedRowBatch batch) {
    for (int i = 0; i < batch.numCols; i++) {
      batch.cols[i].noNulls = true;
    }
  }

  /**
   * Iterates thru all the column vectors and sets repeating to
   * specified column.
   *
   */
  public static void setRepeatingColumn(VectorizedRowBatch batch, int column) {
    ColumnVector cv = batch.cols[column];
    cv.isRepeating = true;
  }

  /**
   * Reduce the batch size for a vectorized row batch
   */
  public static void setBatchSize(VectorizedRowBatch batch, int size) {
    assert (size <= batch.getMaxSize());
    batch.size = size;
  }

  public static ColumnVector createColumnVector(String typeName) {
    typeName = typeName.toLowerCase();

    // Allow undecorated CHAR and VARCHAR to support scratch column type names.
    if (typeName.equals("char") || typeName.equals("varchar")) {
      return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    }

    TypeInfo typeInfo = (TypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
    return createColumnVector(typeInfo);
  }

  public static ColumnVector createColumnVector(TypeInfo typeInfo) {
    switch(typeInfo.getCategory()) {
    case PRIMITIVE:
      {
        PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
        switch(primitiveTypeInfo.getPrimitiveCategory()) {
          case BOOLEAN:
          case BYTE:
          case SHORT:
          case INT:
          case LONG:
          case DATE:
          case INTERVAL_YEAR_MONTH:
            return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
          case TIMESTAMP:
            return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
          case INTERVAL_DAY_TIME:
            return new IntervalDayTimeColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
          case FLOAT:
          case DOUBLE:
            return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
          case BINARY:
          case STRING:
          case CHAR:
          case VARCHAR:
            return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
          case DECIMAL:
            DecimalTypeInfo tInfo = (DecimalTypeInfo) primitiveTypeInfo;
            return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
                tInfo.precision(), tInfo.scale());
          default:
            throw new RuntimeException("Vectorizaton is not supported for datatype:"
                + primitiveTypeInfo.getPrimitiveCategory());
        }
      }
    case STRUCT:
      {
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        List<TypeInfo> typeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
        ColumnVector[] children = new ColumnVector[typeInfoList.size()];
        for(int i=0; i < children.length; ++i) {
          children[i] =
              createColumnVector(typeInfoList.get(i));
        }
        return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
            children);
      }
    case UNION:
      {
        UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
        List<TypeInfo> typeInfoList = unionTypeInfo.getAllUnionObjectTypeInfos();
        ColumnVector[] children = new ColumnVector[typeInfoList.size()];
        for(int i=0; i < children.length; ++i) {
          children[i] = createColumnVector(typeInfoList.get(i));
        }
        return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children);
      }
    case LIST:
      {
        ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
        return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
            createColumnVector(listTypeInfo.getListElementTypeInfo()));
      }
    case MAP:
      {
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
            createColumnVector(mapTypeInfo.getMapKeyTypeInfo()),
            createColumnVector(mapTypeInfo.getMapValueTypeInfo()));
      }
    default:
      throw new RuntimeException("Vectorization is not supported for datatype:"
          + typeInfo.getCategory());
    }
  }

  /**
   * Iterates thru all the columns in a given row and populates the batch
   * from a given offset
   *
   * @param row Deserialized row object
   * @param oi Object insepector for that row
   * @param rowIndex index to which the row should be added to batch
   * @param colOffset offset from where the column begins
   * @param batch Vectorized batch to which the row is added at rowIndex
   * @throws HiveException
   */
  public static void addRowToBatchFrom(Object row, StructObjectInspector oi,
                                   int rowIndex,
                                   int colOffset,
                                   VectorizedRowBatch batch,
                                   DataOutputBuffer buffer
                                   ) throws HiveException {
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    final int off = colOffset;
    // Iterate thru the cols and load the batch
    for (int i = 0; i < fieldRefs.size(); i++) {
      setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, i, off);
    }
  }

  /**
   * Add only the projected column of a regular row to the specified vectorized row batch
   * @param row the regular row
   * @param oi object inspector for the row
   * @param rowIndex the offset to add in the batch
   * @param batch vectorized row batch
   * @param buffer data output buffer
   * @throws HiveException
   */
  public static void addProjectedRowToBatchFrom(Object row, StructObjectInspector oi,
      int rowIndex, VectorizedRowBatch batch, DataOutputBuffer buffer) throws HiveException {
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    for (int i = 0; i < fieldRefs.size(); i++) {
      int projectedOutputCol = batch.projectedColumns[i];
      if (batch.cols[projectedOutputCol] == null) {
        continue;
      }
      setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, projectedOutputCol, 0);
    }
  }
  /**
   * Iterates thru all the columns in a given row and populates the batch
   * from a given offset
   *
   * @param row Deserialized row object
   * @param oi Object insepector for that row
   * @param rowIndex index to which the row should be added to batch
   * @param batch Vectorized batch to which the row is added at rowIndex
   * @param context context object for this vectorized batch
   * @param buffer
   * @throws HiveException
   */
  public static void acidAddRowToBatch(Object row,
                                       StructObjectInspector oi,
                                       int rowIndex,
                                       VectorizedRowBatch batch,
                                       VectorizedRowBatchCtx context,
                                       DataOutputBuffer buffer) throws HiveException {
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    // Iterate thru the cols and load the batch
    for (int i = 0; i < fieldRefs.size(); i++) {
      if (batch.cols[i] == null) {
        // This means the column was not included in the projection from the underlying read
        continue;
      }
      if (context.isPartitionCol(i)) {
        // The value will have already been set before we're called, so don't overwrite it
        continue;
      }
      setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, i, 0);
    }
  }

  private static void setVector(Object row,
                                StructObjectInspector oi,
                                StructField field,
                                VectorizedRowBatch batch,
                                DataOutputBuffer buffer,
                                int rowIndex,
                                int colIndex,
                                int offset) throws HiveException {

    Object fieldData = oi.getStructFieldData(row, field);
    ObjectInspector foi = field.getFieldObjectInspector();

    // Vectorization only supports PRIMITIVE data types. Assert the same
    assert (foi.getCategory() == Category.PRIMITIVE);

    // Get writable object
    PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
    Object writableCol = poi.getPrimitiveWritableObject(fieldData);

    // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for
    // float/double. String types have no default value for null.
    switch (poi.getPrimitiveCategory()) {
    case BOOLEAN: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case BYTE: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case SHORT: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case INT: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case LONG: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case DATE: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case FLOAT: {
      DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
        dcv.isNull[rowIndex] = false;
      } else {
        dcv.vector[rowIndex] = Double.NaN;
        setNullColIsNullValue(dcv, rowIndex);
      }
    }
      break;
    case DOUBLE: {
      DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
        dcv.isNull[rowIndex] = false;
      } else {
        dcv.vector[rowIndex] = Double.NaN;
        setNullColIsNullValue(dcv, rowIndex);
      }
    }
      break;
    case TIMESTAMP: {
      TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        lcv.set(rowIndex, ((TimestampWritable) writableCol).getTimestamp());
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.setNullValue(rowIndex);
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case INTERVAL_YEAR_MONTH: {
      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
        lcv.vector[rowIndex] = i.getTotalMonths();
        lcv.isNull[rowIndex] = false;
      } else {
        lcv.vector[rowIndex] = 1;
        setNullColIsNullValue(lcv, rowIndex);
      }
    }
      break;
    case INTERVAL_DAY_TIME: {
      IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        HiveIntervalDayTime idt = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
        icv.set(rowIndex, idt);
        icv.isNull[rowIndex] = false;
      } else {
        icv.setNullValue(rowIndex);
        setNullColIsNullValue(icv, rowIndex);
      }
    }
      break;
    case BINARY: {
      BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
          bcv.isNull[rowIndex] = false;
          BytesWritable bw = (BytesWritable) writableCol;
          byte[] bytes = bw.getBytes();
          int start = buffer.getLength();
          int length = bw.getLength();
          try {
            buffer.write(bytes, 0, length);
          } catch (IOException ioe) {
            throw new IllegalStateException("bad write", ioe);
          }
          bcv.setRef(rowIndex, buffer.getData(), start, length);
      } else {
        setNullColIsNullValue(bcv, rowIndex);
      }
    }
      break;
    case STRING: {
      BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        bcv.isNull[rowIndex] = false;
        Text colText = (Text) writableCol;
        int start = buffer.getLength();
        int length = colText.getLength();
        try {
          buffer.write(colText.getBytes(), 0, length);
        } catch (IOException ioe) {
          throw new IllegalStateException("bad write", ioe);
        }
        bcv.setRef(rowIndex, buffer.getData(), start, length);
      } else {
        setNullColIsNullValue(bcv, rowIndex);
      }
    }
      break;
    case CHAR: {
      BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        bcv.isNull[rowIndex] = false;
        HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
        byte[] bytes = colHiveChar.getStrippedValue().getBytes();

        // We assume the CHAR maximum length was enforced when the object was created.
        int length = bytes.length;

        int start = buffer.getLength();
        try {
          // In vector mode, we store CHAR as unpadded.
          buffer.write(bytes, 0, length);
        } catch (IOException ioe) {
          throw new IllegalStateException("bad write", ioe);
        }
        bcv.setRef(rowIndex, buffer.getData(), start, length);
      } else {
        setNullColIsNullValue(bcv, rowIndex);
      }
    }
      break;
    case VARCHAR: {
        BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
        if (writableCol != null) {
          bcv.isNull[rowIndex] = false;
          HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
          byte[] bytes = colHiveVarchar.getValue().getBytes();

          // We assume the VARCHAR maximum length was enforced when the object was created.
          int length = bytes.length;

          int start = buffer.getLength();
          try {
            buffer.write(bytes, 0, length);
          } catch (IOException ioe) {
            throw new IllegalStateException("bad write", ioe);
          }
          bcv.setRef(rowIndex, buffer.getData(), start, length);
        } else {
          setNullColIsNullValue(bcv, rowIndex);
        }
      }
        break;
    case DECIMAL:
      DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
      if (writableCol != null) {
        dcv.isNull[rowIndex] = false;
        HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
        dcv.set(rowIndex, wobj);
      } else {
        setNullColIsNullValue(dcv, rowIndex);
      }
      break;
    default:
      throw new HiveException("Vectorizaton is not supported for datatype:" +
          poi.getPrimitiveCategory());
    }
  }

  public static StandardStructObjectInspector convertToStandardStructObjectInspector(
      StructObjectInspector structObjectInspector) throws HiveException {

    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
    List<ObjectInspector> oids = new ArrayList<ObjectInspector>();
    ArrayList<String> columnNames = new ArrayList<String>();

    for(StructField field : fields) {
      TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
          field.getFieldObjectInspector().getTypeName());
      ObjectInspector standardWritableObjectInspector =
              TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
      oids.add(standardWritableObjectInspector);
      columnNames.add(field.getFieldName());
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames,oids);
  }

  public static String[] columnNamesFromStructObjectInspector(
      StructObjectInspector structObjectInspector) throws HiveException {

    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
    String[] result = new String[fields.size()];

    int i = 0;
    for(StructField field : fields) {
      result[i++] =  field.getFieldName();
    }
    return result;
  }

  public static TypeInfo[] typeInfosFromTypeNames(String[] typeNames) throws HiveException {
    ArrayList<TypeInfo> typeInfoList =
        TypeInfoUtils.typeInfosFromTypeNames(Arrays.asList(typeNames));
    return typeInfoList.toArray(new TypeInfo[0]);
  }

  public static TypeInfo[] typeInfosFromStructObjectInspector(
      StructObjectInspector structObjectInspector) {
    ArrayList<TypeInfo> typeInfoList =
        TypeInfoUtils.typeInfosFromStructObjectInspector(structObjectInspector);
    return typeInfoList.toArray(new TypeInfo[0]);
  }

  static ColumnVector cloneColumnVector(ColumnVector source
                                        ) throws HiveException{
    if (source instanceof LongColumnVector) {
      return new LongColumnVector(((LongColumnVector) source).vector.length);
    } else if (source instanceof DoubleColumnVector) {
      return new DoubleColumnVector(((DoubleColumnVector) source).vector.length);
    } else if (source instanceof BytesColumnVector) {
      return new BytesColumnVector(((BytesColumnVector) source).vector.length);
    } else if (source instanceof DecimalColumnVector) {
      DecimalColumnVector decColVector = (DecimalColumnVector) source;
      return new DecimalColumnVector(decColVector.vector.length,
          decColVector.precision,
          decColVector.scale);
    } else if (source instanceof TimestampColumnVector) {
      return new TimestampColumnVector(((TimestampColumnVector) source).getLength());
    } else if (source instanceof IntervalDayTimeColumnVector) {
      return new IntervalDayTimeColumnVector(((IntervalDayTimeColumnVector) source).getLength());
    } else if (source instanceof ListColumnVector) {
      ListColumnVector src = (ListColumnVector) source;
      ColumnVector child = cloneColumnVector(src.child);
      return new ListColumnVector(src.offsets.length, child);
    } else if (source instanceof MapColumnVector) {
      MapColumnVector src = (MapColumnVector) source;
      ColumnVector keys = cloneColumnVector(src.keys);
      ColumnVector values = cloneColumnVector(src.values);
      return new MapColumnVector(src.offsets.length, keys, values);
    } else if (source instanceof StructColumnVector) {
      StructColumnVector src = (StructColumnVector) source;
      ColumnVector[] copy = new ColumnVector[src.fields.length];
      for(int i=0; i < copy.length; ++i) {
        copy[i] = cloneColumnVector(src.fields[i]);
      }
      return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, copy);
    } else if (source instanceof UnionColumnVector) {
      UnionColumnVector src = (UnionColumnVector) source;
      ColumnVector[] copy = new ColumnVector[src.fields.length];
      for(int i=0; i < copy.length; ++i) {
        copy[i] = cloneColumnVector(src.fields[i]);
      }
      return new UnionColumnVector(src.tags.length, copy);
    } else
      throw new HiveException("Column vector class " +
          source.getClass().getName() +
          " is not supported!");
  }

  /**
   * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty
   * @param batch the batch to imitate
   * @return the new batch
   * @throws HiveException
   */
  public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException {
    VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols);
    for (int i = 0; i < batch.numCols; i++) {
      if (batch.cols[i] != null) {
        newBatch.cols[i] = cloneColumnVector(batch.cols[i]);
        newBatch.cols[i].init();
      }
    }
    newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns,
        batch.projectedColumns.length);
    newBatch.projectionSize = batch.projectionSize;
    newBatch.reset();
    return newBatch;
  }

  public static Writable getPrimitiveWritable(PrimitiveCategory primitiveCategory) {
    switch (primitiveCategory) {
    case VOID:
      return null;
    case BOOLEAN:
      return new BooleanWritable(false);
    case BYTE:
      return new ByteWritable((byte) 0);
    case SHORT:
      return new ShortWritable((short) 0);
    case INT:
      return new IntWritable(0);
    case LONG:
      return new LongWritable(0);
    case TIMESTAMP:
      return new TimestampWritable(new Timestamp(0));
    case DATE:
      return new DateWritable(new Date(0));
    case FLOAT:
      return new FloatWritable(0);
    case DOUBLE:
      return new DoubleWritable(0);
    case BINARY:
      return new BytesWritable(ArrayUtils.EMPTY_BYTE_ARRAY);
    case STRING:
      return new Text(ArrayUtils.EMPTY_BYTE_ARRAY);
    case VARCHAR:
      return new HiveVarcharWritable(new HiveVarchar(StringUtils.EMPTY, -1));
    case CHAR:
      return new HiveCharWritable(new HiveChar(StringUtils.EMPTY, -1));
    case DECIMAL:
      return new HiveDecimalWritable();
    case INTERVAL_YEAR_MONTH:
      return new HiveIntervalYearMonthWritable();
    case INTERVAL_DAY_TIME:
      return new HiveIntervalDayTimeWritable();
    default:
      throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
    }
  }

  public static String displayBytes(byte[] bytes, int start, int length) {
    StringBuilder sb = new StringBuilder();
    for (int i = start; i < start + length; i++) {
      char ch = (char) bytes[i];
      if (ch < ' ' || ch > '~') {
        sb.append(String.format("\\%03d", bytes[i] & 0xff));
      } else {
        sb.append(ch);
      }
    }
    return sb.toString();
  }

  public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) {
    StringBuilder sb = new StringBuilder();
    LOG.info(debugFormatOneRow(batch, index, prefix, sb).toString());
  }

  public static StringBuilder debugFormatOneRow(VectorizedRowBatch batch,
      int index, String prefix, StringBuilder sb) {
    sb.append(prefix + " row " + index + " ");
    for (int p = 0; p < batch.projectionSize; p++) {
      int column = batch.projectedColumns[p];
      sb.append("(" + p + "," + column + ") ");
      ColumnVector colVector = batch.cols[column];
      if (colVector == null) {
        sb.append("(null ColumnVector)");
      } else {
        boolean isRepeating = colVector.isRepeating;
        if (isRepeating) {
          sb.append("(repeating)");
        }
        index = (isRepeating ? 0 : index);
        if (colVector.noNulls || !colVector.isNull[index]) {
          if (colVector instanceof LongColumnVector) {
            sb.append(((LongColumnVector) colVector).vector[index]);
          } else if (colVector instanceof DoubleColumnVector) {
            sb.append(((DoubleColumnVector) colVector).vector[index]);
          } else if (colVector instanceof BytesColumnVector) {
            BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
            byte[] bytes = bytesColumnVector.vector[index];
            int start = bytesColumnVector.start[index];
            int length = bytesColumnVector.length[index];
            if (bytes == null) {
              sb.append("(Unexpected null bytes with start " + start + " length " + length + ")");
            } else {
              sb.append("bytes: '" + displayBytes(bytes, start, length) + "'");
            }
          } else if (colVector instanceof DecimalColumnVector) {
            sb.append(((DecimalColumnVector) colVector).vector[index].toString());
          } else if (colVector instanceof TimestampColumnVector) {
            Timestamp timestamp = new Timestamp(0);
            ((TimestampColumnVector) colVector).timestampUpdate(timestamp, index);
            sb.append(timestamp.toString());
          } else if (colVector instanceof IntervalDayTimeColumnVector) {
            HiveIntervalDayTime intervalDayTime = ((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(index);
            sb.append(intervalDayTime.toString());
          } else {
            sb.append("Unknown");
          }
        } else {
          sb.append("NULL");
        }
      }
      sb.append(" ");
    }
    return sb;
  }

  public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) {
    for (int i = 0; i < batch.size; i++) {
      int index = (batch.selectedInUse ? batch.selected[i] : i);
      debugDisplayOneRow(batch, index, prefix);
    }
  }
}