/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.*;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
import org.apache.parquet.io.api.Converter;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.Type;
/**
*
* A MapWritableGroupConverter, real converter between hive and parquet types recursively for complex types.
*
*/
public class HiveStructConverter extends HiveGroupConverter {
private final int totalFieldCount;
private Converter[] converters;
private final ConverterParent parent;
private final int index;
private Writable[] writables;
private List<Repeated> repeatedConverters;
private boolean reuseWritableArray = false;
private List<String> hiveFieldNames;
private List<TypeInfo> hiveFieldTypeInfos;
public HiveStructConverter(final GroupType requestedSchema, final GroupType tableSchema,
Map<String, String> metadata, TypeInfo hiveTypeInfo) {
setMetadata(metadata);
this.reuseWritableArray = true;
this.writables = new Writable[tableSchema.getFieldCount()];
this.parent = null;
this.index = 0;
this.totalFieldCount = tableSchema.getFieldCount();
init(requestedSchema, null, 0, tableSchema, hiveTypeInfo);
}
public HiveStructConverter(final GroupType groupType, final ConverterParent parent,
final int index, TypeInfo hiveTypeInfo) {
this(groupType, parent, index, groupType, hiveTypeInfo);
}
public HiveStructConverter(final GroupType selectedGroupType,
final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) {
this.parent = parent;
this.index = index;
this.totalFieldCount = containingGroupType.getFieldCount();
init(selectedGroupType, parent, index, containingGroupType, hiveTypeInfo);
}
private void init(final GroupType selectedGroupType,
final ConverterParent parent, final int index, final GroupType containingGroupType, TypeInfo hiveTypeInfo) {
if (parent != null) {
setMetadata(parent.getMetadata());
}
final int selectedFieldCount = selectedGroupType.getFieldCount();
converters = new Converter[selectedFieldCount];
this.repeatedConverters = new ArrayList<Repeated>();
if (hiveTypeInfo != null && hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
this.hiveFieldNames = ((StructTypeInfo) hiveTypeInfo).getAllStructFieldNames();
this.hiveFieldTypeInfos = ((StructTypeInfo) hiveTypeInfo).getAllStructFieldTypeInfos();
}
List<Type> selectedFields = selectedGroupType.getFields();
for (int i = 0; i < selectedFieldCount; i++) {
Type subtype = selectedFields.get(i);
if (isSubType(containingGroupType, subtype)) {
int fieldIndex = containingGroupType.getFieldIndex(subtype.getName());
TypeInfo _hiveTypeInfo = getFieldTypeIgnoreCase(hiveTypeInfo, subtype.getName(), fieldIndex);
converters[i] = getFieldConverter(subtype, fieldIndex, _hiveTypeInfo);
} else {
throw new IllegalStateException("Group type [" + containingGroupType +
"] does not contain requested field: " + subtype);
}
}
}
// This method is used to check whether the subType is a sub type of the groupType.
// For nested attribute, we need to check its existence by the root path in a recursive way.
private boolean isSubType(
final GroupType groupType,
final Type subtype) {
if (subtype.isPrimitive() || subtype.isRepetition(Type.Repetition.REPEATED)) {
return groupType.getFields().contains(subtype);
} else {
for (Type g : groupType.getFields()) {
if (!g.isPrimitive() && g.getName().equals(subtype.getName())) {
// check all elements are contained in g
boolean containsAll = false;
for (Type subSubType : subtype.asGroupType().getFields()) {
containsAll = isSubType(g.asGroupType(), subSubType);
if (!containsAll) {
break;
}
}
if (containsAll) {
return containsAll;
}
}
}
return false;
}
}
private TypeInfo getFieldTypeIgnoreCase(TypeInfo hiveTypeInfo, String fieldName, int fieldIndex) {
if (hiveTypeInfo == null) {
return null;
} else if (hiveTypeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
return getStructFieldTypeInfo(fieldName, fieldIndex);
} else if (hiveTypeInfo.getCategory().equals(ObjectInspector.Category.MAP)) {
//This cover the case where hive table may have map<key, value> but the data file is
// of type array<struct<value1, value2>>
//Using index in place of type name.
if (fieldIndex == 0) {
return ((MapTypeInfo) hiveTypeInfo).getMapKeyTypeInfo();
} else if (fieldIndex == 1) {
return ((MapTypeInfo) hiveTypeInfo).getMapValueTypeInfo();
} else {//Other fields are skipped for this case
return null;
}
}
throw new RuntimeException("Unknown hive type info " + hiveTypeInfo + " when searching for field " + fieldName);
}
private TypeInfo getStructFieldTypeInfo(String field, int fieldIndex) {
String fieldLowerCase = field.toLowerCase();
if (Boolean.parseBoolean(getMetadata().get(DataWritableReadSupport.PARQUET_COLUMN_INDEX_ACCESS))
&& fieldIndex < hiveFieldNames.size()) {
return hiveFieldTypeInfos.get(fieldIndex);
}
for (int i = 0; i < hiveFieldNames.size(); i++) {
if (fieldLowerCase.equalsIgnoreCase(hiveFieldNames.get(i))) {
return hiveFieldTypeInfos.get(i);
}
}
//This means hive type doesn't refer this field that comes from file schema.
//i.e. the field is not required for hive table. It can occur due to schema
//evolution where some field is deleted.
return null;
}
private Converter getFieldConverter(Type type, int fieldIndex, TypeInfo hiveTypeInfo) {
Converter converter;
if (type.isRepetition(Type.Repetition.REPEATED)) {
if (type.isPrimitive()) {
converter = new Repeated.RepeatedPrimitiveConverter(
type.asPrimitiveType(), this, fieldIndex, hiveTypeInfo);
} else {
converter = new Repeated.RepeatedGroupConverter(
type.asGroupType(), this, fieldIndex, hiveTypeInfo == null ? null : ((ListTypeInfo) hiveTypeInfo)
.getListElementTypeInfo());
}
repeatedConverters.add((Repeated) converter);
} else {
converter = getConverterFromDescription(type, fieldIndex, this, hiveTypeInfo);
}
return converter;
}
public final ArrayWritable getCurrentArray() {
return new ArrayWritable(Writable.class, writables);
}
@Override
public void set(int fieldIndex, Writable value) {
writables[fieldIndex] = value;
}
@Override
public Converter getConverter(final int fieldIndex) {
return converters[fieldIndex];
}
@Override
public void start() {
if (reuseWritableArray) {
// reset the array to null values
for (int i = 0; i < writables.length; i += 1) {
writables[i] = null;
}
} else {
this.writables = new Writable[totalFieldCount];
}
for (Repeated repeated : repeatedConverters) {
repeated.parentStart();
}
}
@Override
public void end() {
for (Repeated repeated : repeatedConverters) {
repeated.parentEnd();
}
if (parent != null) {
parent.set(index, getCurrentArray());
}
}
}