/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.io.parquet.serde; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetPrimitiveInspectorFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.ArrayWritable; /** * * The ArrayWritableObjectInspector will inspect an ArrayWritable, considering it as a Hive struct.<br /> * It can also inspect a List if Hive decides to inspect the result of an inspection. * */ public class ArrayWritableObjectInspector extends SettableStructObjectInspector { private final TypeInfo typeInfo; private final List<TypeInfo> fieldInfos; private final List<String> fieldNames; private final List<StructField> fields; private final HashMap<String, StructFieldImpl> fieldsByName; // Whether this OI is for the column-level schema (as opposed to nested column fields). private final boolean isRoot; public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) { this(true, rowTypeInfo, null); } public ArrayWritableObjectInspector(StructTypeInfo originalTypeInfo, StructTypeInfo prunedTypeInfo) { this(true, originalTypeInfo, prunedTypeInfo); } public ArrayWritableObjectInspector(boolean isRoot, StructTypeInfo originalTypeInfo, StructTypeInfo prunedTypeInfo) { this.isRoot = isRoot; typeInfo = originalTypeInfo; fieldNames = originalTypeInfo.getAllStructFieldNames(); fieldInfos = originalTypeInfo.getAllStructFieldTypeInfos(); fields = new ArrayList<>(fieldNames.size()); fieldsByName = new HashMap<>(); for (int i = 0; i < fieldNames.size(); ++i) { final String name = fieldNames.get(i); final TypeInfo fieldInfo = fieldInfos.get(i); StructFieldImpl field = null; if (prunedTypeInfo != null) { for (int idx = 0; idx < prunedTypeInfo.getAllStructFieldNames().size(); ++idx) { if (prunedTypeInfo.getAllStructFieldNames().get(idx).equalsIgnoreCase(name)) { TypeInfo prunedFieldInfo = prunedTypeInfo.getAllStructFieldTypeInfos().get(idx); field = new StructFieldImpl(name, getObjectInspector(fieldInfo, prunedFieldInfo), i, idx); break; } } } if (field == null) { field = new StructFieldImpl(name, getObjectInspector(fieldInfo, null), i, i); } fields.add(field); fieldsByName.put(name.toLowerCase(), field); } } private ObjectInspector getObjectInspector( TypeInfo typeInfo, TypeInfo prunedTypeInfo) { if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) { return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) { return PrimitiveObjectInspectorFactory.writableFloatObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) { return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) { return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { return ParquetPrimitiveInspectorFactory.parquetStringInspector; } else if (typeInfo instanceof DecimalTypeInfo) { return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((DecimalTypeInfo) typeInfo); } else if (typeInfo.getCategory().equals(Category.STRUCT)) { return new ArrayWritableObjectInspector(false, (StructTypeInfo) typeInfo, (StructTypeInfo) prunedTypeInfo); } else if (typeInfo.getCategory().equals(Category.LIST)) { final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo(); return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo, null)); } else if (typeInfo.getCategory().equals(Category.MAP)) { final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo(); final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo(); if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) { return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo, null), getObjectInspector(valueTypeInfo, null)); } else { return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo, null), getObjectInspector(valueTypeInfo, null)); } } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) { return ParquetPrimitiveInspectorFactory.parquetByteInspector; } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) { return ParquetPrimitiveInspectorFactory.parquetShortInspector; } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)){ return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; }else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { return PrimitiveObjectInspectorFactory.writableDateObjectInspector; } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME)) { return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((CharTypeInfo) typeInfo); } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((VarcharTypeInfo) typeInfo); } else { throw new UnsupportedOperationException("Unknown field type: " + typeInfo); } } @Override public Category getCategory() { return Category.STRUCT; } @Override public String getTypeName() { return typeInfo.getTypeName(); } @Override public List<? extends StructField> getAllStructFieldRefs() { return fields; } @Override public Object getStructFieldData(final Object data, final StructField fieldRef) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; final StructFieldImpl structField = (StructFieldImpl) fieldRef; int index = isRoot ? structField.getIndex() : structField.adjustedIndex; if (index < arr.get().length) { return arr.get()[index]; } else { return null; } } //since setStructFieldData and create return a list, getStructFieldData should be able to //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde //is something else. if (data instanceof List) { return ((List) data).get(((StructFieldImpl) fieldRef).getIndex()); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); } @Override public StructField getStructFieldRef(final String name) { return fieldsByName.get(name.toLowerCase()); } @Override public List<Object> getStructFieldsDataAsList(final Object data) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; final Object[] arrWritable = arr.get(); return new ArrayList<>(Arrays.asList(arrWritable)); } //since setStructFieldData and create return a list, getStructFieldData should be able to //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde //is something else. if (data instanceof List) { return ((List) data); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); } @Override public Object create() { final ArrayList<Object> list = new ArrayList<Object>(fields.size()); for (int i = 0; i < fields.size(); ++i) { list.add(null); } return list; } @Override public Object setStructFieldData(Object struct, StructField field, Object fieldValue) { final ArrayList<Object> list = (ArrayList<Object>) struct; list.set(((StructFieldImpl) field).getIndex(), fieldValue); return list; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } ArrayWritableObjectInspector that = (ArrayWritableObjectInspector) o; if (isRoot != that.isRoot || (typeInfo != null ? !typeInfo.equals(that.typeInfo) : that.typeInfo != null) || (fieldInfos != null ? !fieldInfos.equals(that.fieldInfos) : that.fieldInfos != null) || (fieldNames != null ? !fieldNames.equals(that.fieldNames) : that.fieldNames != null) || (fields != null ? !fields.equals(that.fields) : that.fields != null)) { return false; } return fieldsByName != null ? fieldsByName.equals(that.fieldsByName) : that.fieldsByName == null; } @Override public int hashCode() { int result = typeInfo != null ? typeInfo.hashCode() : 0; result = 31 * result + (fieldInfos != null ? fieldInfos.hashCode() : 0); result = 31 * result + (fieldNames != null ? fieldNames.hashCode() : 0); result = 31 * result + (fields != null ? fields.hashCode() : 0); result = 31 * result + (fieldsByName != null ? fieldsByName.hashCode() : 0); result = 31 * result + (isRoot ? 1 : 0); return result; } private class StructFieldImpl implements StructField { private final String name; private final ObjectInspector inspector; private final int index; // This is the adjusted index after nested column pruning. // For instance, given the struct type: s:<struct<a:int, b:boolean>> // If only 's.b' is used, the pruned type is: s:<struct<b:boolean>>. // Here, the index of field 'b' is changed from 1 to 0. // When we look up the data from Parquet, index needs to be adjusted accordingly. // Note: currently this is only used in the read path. final int adjustedIndex; public StructFieldImpl(final String name, final ObjectInspector inspector, final int index, int adjustedIndex) { this.name = name; this.inspector = inspector; this.index = index; this.adjustedIndex = adjustedIndex; } @Override public String getFieldComment() { return ""; } @Override public String getFieldName() { return name; } public int getIndex() { return index; } @Override public ObjectInspector getFieldObjectInspector() { return inspector; } @Override public int getFieldID() { return index; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } StructFieldImpl that = (StructFieldImpl) o; if (index != that.index) { return false; } if (adjustedIndex != that.adjustedIndex) { return false; } if (name != null ? !name.equals(that.name) : that.name != null) { return false; } return inspector != null ? inspector.equals(that.inspector) : that.inspector == null; } @Override public int hashCode() { int result = name != null ? name.hashCode() : 0; result = 31 * result + (inspector != null ? inspector.hashCode() : 0); result = 31 * result + index; result = 31 * result + adjustedIndex; return result; } } }