/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
/**
* This class serializes columns from a row in a VectorizedRowBatch into a serialization format.
*
* The caller provides the hive type names and column numbers in the order desired to
* serialize.
*
* This class uses an provided SerializeWrite object to directly serialize by writing
* field-by-field into a serialization format from the primitive values of the VectorizedRowBatch.
*
* Note that when serializing a row, the logical mapping using selected in use has already
* been performed.
*/
public final class VectorSerializeRow<T extends SerializeWrite> {
private T serializeWrite;
private TypeInfo[] typeInfos;
private ObjectInspector[] objectInspectors;
private int[] outputColumnNums;
private VectorExtractRow vectorExtractRow;
public VectorSerializeRow(T serializeWrite) {
this();
this.serializeWrite = serializeWrite;
vectorExtractRow = new VectorExtractRow();
}
// Not public since we must have the serialize write object.
private VectorSerializeRow() {
}
public void init(List<String> typeNames, int[] columnMap) throws HiveException {
final int size = typeNames.size();
typeInfos = new TypeInfo[size];
outputColumnNums = Arrays.copyOf(columnMap, size);
objectInspectors = new ObjectInspector[size];
for (int i = 0; i < size; i++) {
final TypeInfo typeInfo =
TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
typeInfos[i] = typeInfo;
objectInspectors[i] =
TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
}
vectorExtractRow.init(typeInfos, outputColumnNums);
}
public void init(List<String> typeNames) throws HiveException {
final int size = typeNames.size();
typeInfos = new TypeInfo[size];
outputColumnNums = new int[size];
objectInspectors = new ObjectInspector[size];
for (int i = 0; i < size; i++) {
final TypeInfo typeInfo =
TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
typeInfos[i] = typeInfo;
objectInspectors[i] =
TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
outputColumnNums[i] = i;
}
vectorExtractRow.init(typeInfos);
}
public void init(TypeInfo[] typeInfos, int[] columnMap)
throws HiveException {
final int size = typeInfos.length;
this.typeInfos = Arrays.copyOf(typeInfos, size);
outputColumnNums = Arrays.copyOf(columnMap, size);
objectInspectors = new ObjectInspector[size];
for (int i = 0; i < size; i++) {
objectInspectors[i] =
TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos[i]);
}
vectorExtractRow.init(this.typeInfos, outputColumnNums);
}
public int getCount() {
return typeInfos.length;
}
public void setOutput(Output output) {
serializeWrite.set(output);
}
public void setOutputAppend(Output output) {
serializeWrite.setAppend(output);
}
private boolean hasAnyNulls;
private boolean isAllNulls;
/*
* Note that when serializing a row, the logical mapping using selected in use has already
* been performed. batchIndex is the actual index of the row.
*/
public void serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException {
hasAnyNulls = false;
isAllNulls = true;
for (int i = 0; i < typeInfos.length; i++) {
final ColumnVector colVector = batch.cols[outputColumnNums[i]];
serializeWrite(colVector, typeInfos[i], objectInspectors[i], batchIndex);
}
}
private void serializeWrite(
ColumnVector colVector, TypeInfo typeInfo,
ObjectInspector objectInspector, int batchIndex) throws IOException {
int adjustedBatchIndex;
if (colVector.isRepeating) {
adjustedBatchIndex = 0;
} else {
adjustedBatchIndex = batchIndex;
}
if (!colVector.noNulls && colVector.isNull[adjustedBatchIndex]) {
serializeWrite.writeNull();
hasAnyNulls = true;
return;
}
isAllNulls = false;
final Category category = typeInfo.getCategory();
switch (category) {
case PRIMITIVE:
serializePrimitiveWrite(colVector, (PrimitiveTypeInfo) typeInfo, adjustedBatchIndex);
break;
case LIST:
serializeListWrite(
(ListColumnVector) colVector,
(ListTypeInfo) typeInfo,
(ListObjectInspector) objectInspector,
adjustedBatchIndex);
break;
case MAP:
serializeMapWrite(
(MapColumnVector) colVector,
(MapTypeInfo) typeInfo,
(MapObjectInspector) objectInspector,
adjustedBatchIndex);
break;
case STRUCT:
serializeStructWrite(
(StructColumnVector) colVector,
(StructTypeInfo) typeInfo,
(StructObjectInspector) objectInspector,
adjustedBatchIndex);
break;
case UNION:
serializeUnionWrite(
(UnionColumnVector) colVector,
(UnionTypeInfo) typeInfo,
(UnionObjectInspector) objectInspector,
adjustedBatchIndex);
break;
default:
throw new RuntimeException("Unexpected category " + category);
}
}
private void serializeUnionWrite(
UnionColumnVector colVector, UnionTypeInfo typeInfo,
UnionObjectInspector objectInspector, int adjustedBatchIndex) throws IOException {
final byte tag = (byte) colVector.tags[adjustedBatchIndex];
final ColumnVector fieldColumnVector = colVector.fields[tag];
final TypeInfo objectTypeInfo = typeInfo.getAllUnionObjectTypeInfos().get(tag);
serializeWrite.beginUnion(tag);
serializeWrite(
fieldColumnVector,
objectTypeInfo,
objectInspector.getObjectInspectors().get(tag),
adjustedBatchIndex);
serializeWrite.finishUnion();
}
private void serializeStructWrite(
StructColumnVector colVector, StructTypeInfo typeInfo,
StructObjectInspector objectInspector, int adjustedBatchIndex) throws IOException {
final ColumnVector[] fieldColumnVectors = colVector.fields;
final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
final List<? extends StructField> structFields = objectInspector.getAllStructFieldRefs();
final int size = fieldTypeInfos.size();
final List list = (List) vectorExtractRow.extractRowColumn(
colVector, typeInfo, objectInspector, adjustedBatchIndex);
serializeWrite.beginStruct(list);
for (int i = 0; i < size; i++) {
if (i > 0) {
serializeWrite.separateStruct();
}
serializeWrite(
fieldColumnVectors[i],
fieldTypeInfos.get(i),
structFields.get(i).getFieldObjectInspector(),
adjustedBatchIndex);
}
serializeWrite.finishStruct();
}
private void serializeMapWrite(
MapColumnVector colVector, MapTypeInfo typeInfo,
MapObjectInspector objectInspector, int adjustedBatchIndex) throws IOException {
final ColumnVector keyColumnVector = colVector.keys;
final ColumnVector valueColumnVector = colVector.values;
final TypeInfo keyTypeInfo = typeInfo.getMapKeyTypeInfo();
final TypeInfo valueTypeInfo = typeInfo.getMapValueTypeInfo();
final int offset = (int) colVector.offsets[adjustedBatchIndex];
final int size = (int) colVector.lengths[adjustedBatchIndex];
final Map map = (Map) vectorExtractRow.extractRowColumn(
colVector, typeInfo, objectInspector, adjustedBatchIndex);
serializeWrite.beginMap(map);
for (int i = 0; i < size; i++) {
if (i > 0) {
serializeWrite.separateKeyValuePair();
}
serializeWrite(keyColumnVector, keyTypeInfo,
objectInspector.getMapKeyObjectInspector(), offset + i);
serializeWrite.separateKey();
serializeWrite(valueColumnVector, valueTypeInfo,
objectInspector.getMapValueObjectInspector(), offset + i);
}
serializeWrite.finishMap();
}
private void serializeListWrite(
ListColumnVector colVector, ListTypeInfo typeInfo,
ListObjectInspector objectInspector, int adjustedBatchIndex) throws IOException {
final ColumnVector childColumnVector = colVector.child;
final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo();
final int offset = (int) colVector.offsets[adjustedBatchIndex];
final int size = (int) colVector.lengths[adjustedBatchIndex];
final ObjectInspector elementObjectInspector = objectInspector.getListElementObjectInspector();
final List list = (List) vectorExtractRow.extractRowColumn(
colVector, typeInfo, objectInspector, adjustedBatchIndex);
serializeWrite.beginList(list);
for (int i = 0; i < size; i++) {
if (i > 0) {
serializeWrite.separateList();
}
serializeWrite(
childColumnVector, elementTypeInfo, elementObjectInspector, offset + i);
}
serializeWrite.finishList();
}
private void serializePrimitiveWrite(
ColumnVector colVector, PrimitiveTypeInfo typeInfo, int adjustedBatchIndex) throws IOException {
final PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory();
switch (primitiveCategory) {
case BOOLEAN:
serializeWrite.writeBoolean(((LongColumnVector) colVector).vector[adjustedBatchIndex] != 0);
break;
case BYTE:
serializeWrite.writeByte((byte) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case SHORT:
serializeWrite.writeShort((short) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case INT:
serializeWrite.writeInt((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case LONG:
serializeWrite.writeLong(((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case DATE:
serializeWrite.writeDate((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case TIMESTAMP:
serializeWrite.writeTimestamp(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex));
break;
case FLOAT:
serializeWrite.writeFloat((float) ((DoubleColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case DOUBLE:
serializeWrite.writeDouble(((DoubleColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case STRING:
case CHAR:
case VARCHAR:
{
// We store CHAR and VARCHAR without pads, so write with STRING.
final BytesColumnVector bytesColVector = (BytesColumnVector) colVector;
serializeWrite.writeString(
bytesColVector.vector[adjustedBatchIndex],
bytesColVector.start[adjustedBatchIndex],
bytesColVector.length[adjustedBatchIndex]);
}
break;
case BINARY:
{
final BytesColumnVector bytesColVector = (BytesColumnVector) colVector;
serializeWrite.writeBinary(
bytesColVector.vector[adjustedBatchIndex],
bytesColVector.start[adjustedBatchIndex],
bytesColVector.length[adjustedBatchIndex]);
}
break;
case DECIMAL:
{
final DecimalColumnVector decimalColVector = (DecimalColumnVector) colVector;
serializeWrite.writeHiveDecimal(decimalColVector.vector[adjustedBatchIndex], decimalColVector.scale);
}
break;
case INTERVAL_YEAR_MONTH:
serializeWrite.writeHiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case INTERVAL_DAY_TIME:
serializeWrite.writeHiveIntervalDayTime(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedBatchIndex));
break;
default:
throw new RuntimeException("Unexpected primitive category " + primitiveCategory);
}
}
public boolean getHasAnyNulls() {
return hasAnyNulls;
}
public boolean getIsAllNulls() {
return isAllNulls;
}
}