/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.avro;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
/**
* An AvroObjectInspectorGenerator takes an Avro schema and creates the three
* data structures Hive needs to work with Avro-encoded data:
* * A list of the schema field names
* * A list of those fields equivalent types in Hive
* * An ObjectInspector capable of working with an instance of that datum.
*/
public class AvroObjectInspectorGenerator {
final private List<String> columnNames;
final private List<TypeInfo> columnTypes;
final private List<String> columnComments;
final private ObjectInspector oi;
public AvroObjectInspectorGenerator(Schema schema) throws SerDeException {
verifySchemaIsARecord(schema);
this.columnNames = AvroObjectInspectorGenerator.generateColumnNames(schema);
this.columnTypes = SchemaToTypeInfo.generateColumnTypes(schema);
this.columnComments = AvroObjectInspectorGenerator.generateColumnComments(schema);
assert columnNames.size() == columnTypes.size();
this.oi = createObjectInspector();
}
private void verifySchemaIsARecord(Schema schema) throws SerDeException {
if(!schema.getType().equals(Schema.Type.RECORD)) {
throw new AvroSerdeException("Schema for table must be of type RECORD. " +
"Received type: " + schema.getType());
}
}
public List<String> getColumnNames() {
return columnNames;
}
public List<TypeInfo> getColumnTypes() {
return columnTypes;
}
public ObjectInspector getObjectInspector() {
return oi;
}
private ObjectInspector createObjectInspector() throws SerDeException {
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
// At this point we've verified the types are correct.
for(int i = 0; i < columnNames.size(); i++) {
columnOIs.add(i, createObjectInspectorWorker(columnTypes.get(i)));
}
return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, columnComments);
}
private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
// We don't need to do the check for U[T,Null] here because we'll give the real type
// at deserialization and the object inspector will never see the actual union.
if(!supportedCategories(ti)) {
throw new AvroSerdeException("Don't yet support this type: " + ti);
}
ObjectInspector result;
switch(ti.getCategory()) {
case PRIMITIVE:
PrimitiveTypeInfo pti = (PrimitiveTypeInfo)ti;
result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
break;
case STRUCT:
StructTypeInfo sti = (StructTypeInfo)ti;
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
for(TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
ois.add(createObjectInspectorWorker(typeInfo));
}
result = ObjectInspectorFactory
.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
break;
case MAP:
MapTypeInfo mti = (MapTypeInfo)ti;
result = ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory
.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING),
createObjectInspectorWorker(mti.getMapValueTypeInfo()));
break;
case LIST:
ListTypeInfo ati = (ListTypeInfo)ti;
result = ObjectInspectorFactory
.getStandardListObjectInspector(createObjectInspectorWorker(ati.getListElementTypeInfo()));
break;
case UNION:
UnionTypeInfo uti = (UnionTypeInfo)ti;
List<TypeInfo> allUnionObjectTypeInfos = uti.getAllUnionObjectTypeInfos();
List<ObjectInspector> unionObjectInspectors = new ArrayList<ObjectInspector>(allUnionObjectTypeInfos.size());
for (TypeInfo typeInfo : allUnionObjectTypeInfos) {
unionObjectInspectors.add(createObjectInspectorWorker(typeInfo));
}
result = ObjectInspectorFactory.getStandardUnionObjectInspector(unionObjectInspectors);
break;
default:
throw new AvroSerdeException("No Hive categories matched: " + ti);
}
return result;
}
private boolean supportedCategories(TypeInfo ti) {
final ObjectInspector.Category c = ti.getCategory();
return c.equals(ObjectInspector.Category.PRIMITIVE) ||
c.equals(ObjectInspector.Category.MAP) ||
c.equals(ObjectInspector.Category.LIST) ||
c.equals(ObjectInspector.Category.STRUCT) ||
c.equals(ObjectInspector.Category.UNION);
}
public static List<String> generateColumnNames(Schema schema) {
List<Schema.Field> fields = schema.getFields();
List<String> fieldsList = new ArrayList<String>(fields.size());
for (Schema.Field field : fields) {
fieldsList.add(field.name());
}
return fieldsList;
}
public static List<String> generateColumnComments(Schema schema) {
List<Schema.Field> fields = schema.getFields();
List<String> fieldComments = new ArrayList<String>(fields.size());
for (Schema.Field field : fields) {
String fieldComment = field.doc() == null ? "" : field.doc();
fieldComments.add(fieldComment);
}
return fieldComments;
}
}