/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.fast; import java.io.IOException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; /* * Directly deserialize with the caller reading field-by-field a serialization format. * * The caller is responsible for calling the read method for the right type of each field * (after calling readNextField). * * Reading some fields require a results object to receive value information. A separate * results object is created by the caller at initialization per different field even for the same * type. * * Some type values are by reference to either bytes in the deserialization buffer or to * other type specific buffers. So, those references are only valid until the next time set is * called. */ public abstract class DeserializeRead { protected TypeInfo[] typeInfos; protected boolean useExternalBuffer; protected Category[] categories; protected PrimitiveCategory[] primitiveCategories; /* * This class is used to read one field at a time. Simple fields like long, double, int are read * into to primitive current* members; the non-simple field types like Date, Timestamp, etc, are * read into a current object that this method will allocate. * * This method handles complex type fields by recursively calling this method. */ private void allocateCurrentWritable(TypeInfo typeInfo) { switch (typeInfo.getCategory()) { case PRIMITIVE: switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { case DATE: if (currentDateWritable == null) { currentDateWritable = new DateWritable(); } break; case TIMESTAMP: if (currentTimestampWritable == null) { currentTimestampWritable = new TimestampWritable(); } break; case INTERVAL_YEAR_MONTH: if (currentHiveIntervalYearMonthWritable == null) { currentHiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); } break; case INTERVAL_DAY_TIME: if (currentHiveIntervalDayTimeWritable == null) { currentHiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); } break; case DECIMAL: if (currentHiveDecimalWritable == null) { currentHiveDecimalWritable = new HiveDecimalWritable(); } break; default: // No writable needed for this data type. } break; case LIST: allocateCurrentWritable(((ListTypeInfo) typeInfo).getListElementTypeInfo()); break; case MAP: allocateCurrentWritable(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); allocateCurrentWritable(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); break; case STRUCT: for (TypeInfo fieldTypeInfo : ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos()) { allocateCurrentWritable(fieldTypeInfo); } break; case UNION: for (TypeInfo fieldTypeInfo : ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { allocateCurrentWritable(fieldTypeInfo); } break; default: throw new RuntimeException("Unexpected category " + typeInfo.getCategory()); } } /** * Constructor. * * When useExternalBuffer is specified true and readNextField reads a string/char/varchar/binary * field, it will request an external buffer to receive the data of format conversion. * * if (deserializeRead.readNextField()) { * if (deserializeRead.currentExternalBufferNeeded) { * <Ensure external buffer is as least deserializeRead.currentExternalBufferNeededLen bytes> * deserializeRead.copyToExternalBuffer(externalBuffer, externalBufferStart); * } else { * <Otherwise, field data is available in the currentBytes, currentBytesStart, and * currentBytesLength of deserializeRead> * } * * @param typeInfos * @param useExternalBuffer Specify true when the caller is prepared to provide a bytes buffer * to receive a string/char/varchar/binary field that needs format * conversion. */ public DeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) { this.typeInfos = typeInfos; final int count = typeInfos.length; categories = new Category[count]; primitiveCategories = new PrimitiveCategory[count]; for (int i = 0; i < count; i++) { TypeInfo typeInfo = typeInfos[i]; Category category = typeInfo.getCategory(); categories[i] = category; if (category == Category.PRIMITIVE) { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); primitiveCategories[i] = primitiveCategory; } allocateCurrentWritable(typeInfo); this.useExternalBuffer = useExternalBuffer; } } // Don't allow for public. protected DeserializeRead() { } /* * The type information for all fields. */ public TypeInfo[] typeInfos() { return typeInfos; } /* * Set the range of bytes to be deserialized. */ public abstract void set(byte[] bytes, int offset, int length); /* * Reads the the next field. * * Afterwards, reading is positioned to the next field. * * @return Return true when the field was not null and data is put in the appropriate * current* member. * Otherwise, false when the field is null. * */ public abstract boolean readNextField() throws IOException; /* * Reads through an undesired field. * * No data values are valid after this call. * Designed for skipping columns that are not included. */ public abstract void skipNextField() throws IOException; /* * Returns true if the readField method is supported; */ public boolean isReadFieldSupported() { return false; } /* * When supported, read a field by field number (i.e. random access). * * Currently, only LazySimpleDeserializeRead supports this. * * @return Return true when the field was not null and data is put in the appropriate * current* member. * Otherwise, false when the field is null. */ public boolean readField(int fieldIndex) throws IOException { throw new RuntimeException("Not supported"); } /* * Tests whether there is another List element or another Map key/value pair. */ public abstract boolean isNextComplexMultiValue() throws IOException; /* * Read a field that is under a complex type. It may be a primitive type or deeper complex type. */ public abstract boolean readComplexField() throws IOException; /* * Used by Struct and Union complex type readers to indicate the (final) field has been fully * read and the current complex type is finished. */ public abstract void finishComplexVariableFieldsType(); /* * Call this method may be called after all the all fields have been read to check * for unread fields. * * Note that when optimizing reading to stop reading unneeded include columns, worrying * about whether all data is consumed is not appropriate (often we aren't reading it all by * design). * * Since LazySimpleDeserializeRead parses the line through the last desired column it does * support this function. */ public abstract boolean isEndOfInputReached(); /* * Get detailed read position information to help diagnose exceptions. */ public abstract String getDetailedReadPositionString(); /* * These members hold the current value that was read when readNextField return false. */ /* * BOOLEAN. */ public boolean currentBoolean; /* * BYTE. */ public byte currentByte; /* * SHORT. */ public short currentShort; /* * INT. */ public int currentInt; /* * LONG. */ public long currentLong; /* * FLOAT. */ public float currentFloat; /* * DOUBLE. */ public double currentDouble; /* * STRING, CHAR, VARCHAR, and BINARY. * * For CHAR and VARCHAR when the caller takes responsibility for * truncation/padding issues. * * When currentExternalBufferNeeded is true, conversion is needed into an external buffer of * at least currentExternalBufferNeededLen bytes. Use copyToExternalBuffer to get the result. * * Otherwise, currentBytes, currentBytesStart, and currentBytesLength are the result. */ public boolean currentExternalBufferNeeded; public int currentExternalBufferNeededLen; public void copyToExternalBuffer(byte[] externalBuffer, int externalBufferStart) throws IOException { throw new RuntimeException("Not implemented"); } public byte[] currentBytes; public int currentBytesStart; public int currentBytesLength; /* * DATE. */ public DateWritable currentDateWritable; /* * TIMESTAMP. */ public TimestampWritable currentTimestampWritable; /* * INTERVAL_YEAR_MONTH. */ public HiveIntervalYearMonthWritable currentHiveIntervalYearMonthWritable; /* * INTERVAL_DAY_TIME. */ public HiveIntervalDayTimeWritable currentHiveIntervalDayTimeWritable; /* * DECIMAL. */ public HiveDecimalWritable currentHiveDecimalWritable; }