/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Charsets; /** * This class extracts specified VectorizedRowBatch row columns into writables. * * The caller provides the data types and projection column numbers of a subset of the columns * to extract. */ public class VectorExtractRow { private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(VectorExtractRow.class); /* * These members have information for extracting a row column objects from VectorizedRowBatch * columns. */ int[] projectionColumnNums; // Extraction can be a subset of columns, so this is the projection -- // the batch column numbers. TypeInfo[] typeInfos; ObjectInspector[] objectInspectors; /* * Allocate the various arrays. */ private void allocateArrays(int count) { projectionColumnNums = new int[count]; typeInfos = new TypeInfo[count]; objectInspectors = new ObjectInspector[count]; } /* * Initialize one column's array entries. */ private void initEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) { projectionColumnNums[logicalColumnIndex] = projectionColumnNum; typeInfos[logicalColumnIndex] = typeInfo; objectInspectors[logicalColumnIndex] = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); } /* * Initialize using an StructObjectInspector and a column projection list. */ public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) throws HiveException { List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs(); final int count = fields.size(); allocateArrays(count); for (int i = 0; i < count; i++) { int projectionColumnNum = projectedColumns.get(i); StructField field = fields.get(i); ObjectInspector fieldInspector = field.getFieldObjectInspector(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName()); initEntry(i, projectionColumnNum, typeInfo); } } /* * Initialize using an ObjectInspector array and a column projection array. */ public void init(TypeInfo[] typeInfos, int[] projectedColumns) throws HiveException { final int count = typeInfos.length; allocateArrays(count); for (int i = 0; i < count; i++) { initEntry(i, projectedColumns[i], typeInfos[i]); } } /* * Initialize using data type names. * No projection -- the column range 0 .. types.size()-1 */ public void init(List<String> typeNames) throws HiveException { final int count = typeNames.size(); allocateArrays(count); for (int i = 0; i < count; i++) { initEntry(i, i, TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i))); } } public void init(TypeInfo[] typeInfos) throws HiveException { final int count = typeInfos.length; allocateArrays(count); for (int i = 0; i < count; i++) { initEntry(i, i, typeInfos[i]); } } public int getCount() { return projectionColumnNums.length; } /** * Extract a row's column object from the ColumnVector at batchIndex in the VectorizedRowBatch. * * @param batch * @param batchIndex * @param logicalColumnIndex * @return */ private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; final ColumnVector colVector = batch.cols[projectionColumnNum]; return extractRowColumn( colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex); } Object extractRowColumn( ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) { if (colVector == null) { // The planner will not include unneeded columns for reading but other parts of execution // may ask for them.. return null; } final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (!colVector.noNulls && colVector.isNull[adjustedIndex]) { return null; } final Category category = typeInfo.getCategory(); switch (category) { case PRIMITIVE: { final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); final Writable primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory); switch (primitiveCategory) { case VOID: return null; case BOOLEAN: ((BooleanWritable) primitiveWritable).set( ((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true); return primitiveWritable; case BYTE: ((ByteWritable) primitiveWritable).set( (byte) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case SHORT: ((ShortWritable) primitiveWritable).set( (short) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INT: ((IntWritable) primitiveWritable).set( (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case LONG: ((LongWritable) primitiveWritable).set( ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case TIMESTAMP: ((TimestampWritable) primitiveWritable).set( ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex)); return primitiveWritable; case DATE: ((DateWritable) primitiveWritable).set( (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case FLOAT: ((FloatWritable) primitiveWritable).set( (float) ((DoubleColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case DOUBLE: ((DoubleWritable) primitiveWritable).set( ((DoubleColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case BINARY: { final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector); final byte[] bytes = bytesColVector.vector[adjustedIndex]; final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { LOG.info("null binary entry: batchIndex " + batchIndex); } BytesWritable bytesWritable = (BytesWritable) primitiveWritable; bytesWritable.set(bytes, start, length); return primitiveWritable; } case STRING: { final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector); final byte[] bytes = bytesColVector.vector[adjustedIndex]; final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { nullBytesReadError(primitiveCategory, batchIndex); } // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. ((Text) primitiveWritable).set(bytes, start, length); return primitiveWritable; } case VARCHAR: { final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector); final byte[] bytes = bytesColVector.vector[adjustedIndex]; final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { nullBytesReadError(primitiveCategory, batchIndex); } final int adjustedLength = StringExpr.truncate(bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); final HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable; hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1); return primitiveWritable; } case CHAR: { final BytesColumnVector bytesColVector = ((BytesColumnVector) colVector); final byte[] bytes = bytesColVector.vector[adjustedIndex]; final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { nullBytesReadError(primitiveCategory, batchIndex); } final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, ((CharTypeInfo) primitiveTypeInfo).getLength()); final HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable; hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), ((CharTypeInfo) primitiveTypeInfo).getLength()); return primitiveWritable; } case DECIMAL: // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields. ((HiveDecimalWritable) primitiveWritable).set( ((DecimalColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INTERVAL_YEAR_MONTH: ((HiveIntervalYearMonthWritable) primitiveWritable).set( (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INTERVAL_DAY_TIME: ((HiveIntervalDayTimeWritable) primitiveWritable).set( ((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex)); return primitiveWritable; default: throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported"); } } case LIST: { final ListColumnVector listColumnVector = (ListColumnVector) colVector; final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector; final int offset = (int) listColumnVector.offsets[adjustedIndex]; final int size = (int) listColumnVector.lengths[adjustedIndex]; final List list = new ArrayList(); for (int i = 0; i < size; i++) { list.add( extractRowColumn( listColumnVector.child, listTypeInfo.getListElementTypeInfo(), listObjectInspector.getListElementObjectInspector(), offset + i)); } return list; } case MAP: { final MapColumnVector mapColumnVector = (MapColumnVector) colVector; final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector; final int offset = (int) mapColumnVector.offsets[adjustedIndex]; final int size = (int) mapColumnVector.lengths[adjustedIndex]; final Map map = new HashMap(); for (int i = 0; i < size; i++) { final Object key = extractRowColumn( mapColumnVector.keys, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), offset + i); final Object value = extractRowColumn( mapColumnVector.values, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), offset + i); map.put(key, value); } return map; } case STRUCT: { final StructColumnVector structColumnVector = (StructColumnVector) colVector; final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; final StandardStructObjectInspector structInspector = (StandardStructObjectInspector) objectInspector; final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); final int size = fieldTypeInfos.size(); final List<? extends StructField> structFields = structInspector.getAllStructFieldRefs(); final Object struct = structInspector.create(); for (int i = 0; i < size; i++) { final StructField structField = structFields.get(i); final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); final Object value = extractRowColumn( structColumnVector.fields[i], fieldTypeInfo, structField.getFieldObjectInspector(), adjustedIndex); structInspector.setStructFieldData(struct, structField, value); } return struct; } case UNION: { final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector; final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors(); final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector; final byte tag = (byte) unionColumnVector.tags[adjustedIndex]; final Object object = extractRowColumn( unionColumnVector.fields[tag], objectTypeInfos.get(tag), unionInspectors.get(tag), adjustedIndex); final StandardUnion standardUnion = new StandardUnion(); standardUnion.setTag(tag); standardUnion.setObject(object); return standardUnion; } default: throw new RuntimeException("Category " + category.name() + " not supported"); } } /** * Extract an row object from a VectorizedRowBatch at batchIndex. * * @param batch * @param batchIndex * @param objects */ public void extractRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) { for (int i = 0; i < projectionColumnNums.length; i++) { objects[i] = extractRowColumn(batch, batchIndex, i); } } private void nullBytesReadError(PrimitiveCategory primitiveCategory, int batchIndex) { throw new RuntimeException("null " + primitiveCategory.name() + " entry: batchIndex " + batchIndex); } }