/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.hive.serde; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveBaseCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyArray; import org.apache.hadoop.hive.serde2.lazy.LazyMap; import org.apache.hadoop.hive.serde2.lazy.LazyNonPrimitive; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.ShortWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Used to serialize objects in a SerDe. Objects can come from native Hive tables or they can come from the * {@link ObjectDeserializer}. */ public class ObjectSerializer { private static final Gson GSON = new Gson(); private final ArrayList<String> columnNames; public ObjectSerializer(ArrayList<String> columnNames) { this.columnNames = columnNames; } public Writable serialize(Object o, ObjectInspector objectInspector) { //overwrite field names (as they get lost by Hive) StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector); structTypeInfo.setAllStructFieldNames(columnNames); List<TypeInfo> info = structTypeInfo.getAllStructFieldTypeInfos(); List<String> names = structTypeInfo.getAllStructFieldNames(); Map<String, Object> recordMap = new HashMap<>(); List<Object> recordObjects = ((StructObjectInspector) objectInspector).getStructFieldsDataAsList(o); for (int structIndex = 0; structIndex < info.size(); structIndex++) { Object obj = recordObjects.get(structIndex); TypeInfo objType = info.get(structIndex); if (obj instanceof LazyNonPrimitive || obj instanceof LazyPrimitive) { // In case the SerDe that deserialized the object is the one of a native table recordMap.put(names.get(structIndex), fromLazyObject(objType, obj)); } else if (obj instanceof Writable) { // Native tables sometimes introduce primitive Writable objects at this point recordMap.put(names.get(structIndex), fromWritable((Writable) obj)); } else { // In case the deserializer is the DatasetSerDe recordMap.put(names.get(structIndex), serialize(obj, objType)); } } // TODO Improve serialization logic - CDAP-11 return new Text(GSON.toJson(recordMap)); } @SuppressWarnings("unchecked") private Object serialize(Object obj, TypeInfo typeInfo) { switch (typeInfo.getCategory()) { case PRIMITIVE: return obj; case LIST: return serializeList((List<Object>) obj, (ListTypeInfo) typeInfo); case MAP: return serializeMap((Map<Object, Object>) obj, (MapTypeInfo) typeInfo); case STRUCT: return serializeStruct((List<Object>) obj, (StructTypeInfo) typeInfo); case UNION: throw new UnsupportedOperationException("union not yet supported"); } throw new IllegalArgumentException("Unknown category " + typeInfo.getCategory()); } private Object serializeList(List<Object> list, ListTypeInfo typeInfo) { // need to recurse since it may contain structs TypeInfo elementType = typeInfo.getListElementTypeInfo(); List<Object> serialized = Lists.newArrayListWithCapacity(list.size()); for (int i = 0; i < list.size(); i++) { serialized.add(i, serialize(list.get(i), elementType)); } return serialized; } private Object serializeMap(Map<Object, Object> map, MapTypeInfo typeInfo) { // need to recurse since it may contain structs Map<Object, Object> serialized = Maps.newHashMapWithExpectedSize(map.size()); TypeInfo keyType = typeInfo.getMapKeyTypeInfo(); TypeInfo valType = typeInfo.getMapValueTypeInfo(); for (Map.Entry<Object, Object> mapEntry : map.entrySet()) { serialized.put(serialize(mapEntry.getKey(), keyType), serialize(mapEntry.getValue(), valType)); } return serialized; } // a struct is represented as a list of objects private Object serializeStruct(List<Object> struct, StructTypeInfo typeInfo) { Map<String, Object> serialized = Maps.newHashMapWithExpectedSize(struct.size()); List<TypeInfo> types = typeInfo.getAllStructFieldTypeInfos(); List<String> names = typeInfo.getAllStructFieldNames(); for (int i = 0; i < struct.size(); i++) { serialized.put(names.get(i), serialize(struct.get(i), types.get(i))); } return serialized; } private Object fromWritable(Writable writable) { if (writable instanceof IntWritable) { return ((IntWritable) writable).get(); } else if (writable instanceof LongWritable) { return ((LongWritable) writable).get(); } else if (writable instanceof ShortWritable) { return ((ShortWritable) writable).get(); } else if (writable instanceof BooleanWritable) { return ((BooleanWritable) writable).get(); } else if (writable instanceof DoubleWritable) { return ((DoubleWritable) writable).get(); } else if (writable instanceof FloatWritable) { return ((FloatWritable) writable).get(); } else if (writable instanceof Text) { return writable.toString(); } else if (writable instanceof BytesWritable) { return ((BytesWritable) writable).getBytes(); } else if (writable instanceof ByteWritable) { return ((ByteWritable) writable).get(); } else if (writable instanceof DateWritable) { return ((DateWritable) writable).get(); } else if (writable instanceof org.apache.hadoop.hive.serde2.io.ShortWritable) { return ((org.apache.hadoop.hive.serde2.io.ShortWritable) writable).get(); } else if (writable instanceof HiveBaseCharWritable) { return ((HiveBaseCharWritable) writable).getTextValue().toString(); } else if (writable instanceof TimestampWritable) { return ((TimestampWritable) writable).getTimestamp(); } else if (writable instanceof org.apache.hadoop.hive.serde2.io.DoubleWritable) { return ((org.apache.hadoop.hive.serde2.io.DoubleWritable) writable).get(); } else if (writable instanceof HiveDecimalWritable) { return ((HiveDecimalWritable) writable).getHiveDecimal(); } else if (writable instanceof NullWritable) { return null; } return writable.toString(); } private Object fromLazyObject(TypeInfo type, Object data) { if (data == null) { return null; } switch (type.getCategory()) { case PRIMITIVE: Writable writable = ((LazyPrimitive) data).getWritableObject(); return fromWritable(writable); case LIST: ListTypeInfo listType = (ListTypeInfo) type; TypeInfo listElementType = listType.getListElementTypeInfo(); List<Object> list = ((LazyArray) data).getList(); if (list.isEmpty()) { return ImmutableList.of(); } Object[] arrayContent = new Object[list.size()]; for (int i = 0; i < arrayContent.length; i++) { arrayContent[i] = fromLazyObject(listElementType, list.get(i)); } return arrayContent; case MAP: MapTypeInfo mapType = (MapTypeInfo) type; Map<Object, Object> mapContent = Maps.newConcurrentMap(); Map<Object, Object> map = ((LazyMap) data).getMap(); for (Map.Entry<Object, Object> entry : map.entrySet()) { mapContent.put(fromLazyObject(mapType.getMapKeyTypeInfo(), entry.getKey()), fromLazyObject(mapType.getMapValueTypeInfo(), entry.getValue())); } return mapContent; case STRUCT: StructTypeInfo structType = (StructTypeInfo) type; List<TypeInfo> info = structType.getAllStructFieldTypeInfos(); List<String> names = structType.getAllStructFieldNames(); Map<String, Object> structMap = Maps.newConcurrentMap(); List<Object> struct = ((LazyStruct) data).getFieldsAsList(); for (int structIndex = 0; structIndex < info.size(); structIndex++) { structMap.put(names.get(structIndex), fromLazyObject(info.get(structIndex), struct.get(structIndex))); } return structMap; case UNION: throw new UnsupportedOperationException("union not yet supported"); default: return data.toString(); } } }