/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.columnar; import java.util.ArrayList; import java.util.List; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Writable; /** * LazyBinaryColumnarSerDe. This serde combines elements of columnar serde and lazybinary serde * to produce a serde which serializes columns into a BytesRefArrayWritable in a compact binary * format and which is deserialized in a lazy, i.e. on-demand fashion. * */ @SerDeSpec(schemaProps = {serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES}) public class LazyBinaryColumnarSerDe extends ColumnarSerDeBase { private List<String> columnNames; private List<TypeInfo> columnTypes; @Override public String toString() { return getClass().toString() + "[" + columnNames + ":" + columnTypes + "]"; } @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName()); columnNames = serdeParams.getColumnNames(); columnTypes = serdeParams.getColumnTypes(); cachedObjectInspector = LazyBinaryFactory.createColumnarStructInspector( columnNames, columnTypes); int size = columnTypes.size(); List<Integer> notSkipIDs = new ArrayList<Integer>(); if (conf == null || ColumnProjectionUtils.isReadAllColumns(conf)) { for (int i = 0; i < size; i++ ) { notSkipIDs.add(i); } } else { notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf); } cachedLazyStruct = new LazyBinaryColumnarStruct(cachedObjectInspector, notSkipIDs); super.initialize(size); } static final byte[] INVALID_UTF__SINGLE_BYTE = {(byte)Integer.parseInt("10111111", 2)}; @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); List<Object> list = soi.getStructFieldsDataAsList(obj); LazyBinarySerDe.BooleanRef warnedOnceNullMapKey = new LazyBinarySerDe.BooleanRef(false); serializeStream.reset(); serializedSize = 0; int streamOffset = 0; // Serialize each field for (int i = 0; i < fields.size(); i++) { // Get the field objectInspector and the field object. ObjectInspector foi = fields.get(i).getFieldObjectInspector(); Object f = (list == null ? null : list.get(i)); //empty strings are marked by an invalid utf single byte sequence. A valid utf stream cannot //produce this sequence if ((f != null) && (foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) && ((PrimitiveObjectInspector) foi).getPrimitiveCategory().equals( PrimitiveObjectInspector.PrimitiveCategory.STRING) && ((StringObjectInspector) foi).getPrimitiveJavaObject(f).length() == 0) { serializeStream.write(INVALID_UTF__SINGLE_BYTE, 0, 1); } else { LazyBinarySerDe.serialize(serializeStream, f, foi, true, warnedOnceNullMapKey); } field[i].set(serializeStream.getData(), streamOffset, serializeStream .getLength() - streamOffset); streamOffset = serializeStream.getLength(); } serializedSize = serializeStream.getLength(); lastOperationSerialize = true; lastOperationDeserialize = false; return serializeCache; } }