/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.hive; import com.google.common.base.Function; import com.google.common.collect.Lists; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Assert; import org.junit.Test; import javax.annotation.Nullable; import java.util.List; public class TestSchemaConversion { private static final Schema SIMPLE_RECORD = SchemaBuilder.record("SimpleRecord").fields() .name("id").type().intType().noDefault() .name("name").type().stringType().noDefault() .endRecord(); private static final Schema COMPLEX_RECORD = SchemaBuilder.record("ComplexRecord").fields() .name("groupName").type().stringType().noDefault() .name("simpleRecords").type().array().items() .type(SIMPLE_RECORD).noDefault() .endRecord(); private static final Function<FieldSchema, String> GET_NAMES = new Function<FieldSchema, String>() { @Override public String apply(@Nullable FieldSchema input) { if (input != null) { return input.getName(); } else { return null; } } }; private static final Function<FieldSchema, String> GET_TYPE_STRINGS = new Function<FieldSchema, String>() { @Override public String apply(@Nullable FieldSchema input) { if (input != null) { return input.getType(); } else { return null; } } }; private static final TypeInfo BOOLEAN_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.BOOLEAN); private static final TypeInfo INT_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.INT); private static final TypeInfo LONG_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.LONG); private static final TypeInfo FLOAT_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.FLOAT); private static final TypeInfo DOUBLE_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.DOUBLE); private static final TypeInfo STRING_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.STRING); private static final TypeInfo BINARY_TYPE_INFO = HiveSchemaConverter.TYPE_TO_TYPEINFO.get(Schema.Type.BYTES); @Test public void testConvertSchemaWithPrimitive() { Schema primitiveSchema = SchemaBuilder.builder().stringType(); List<FieldSchema> fields = HiveSchemaConverter.convertSchema(primitiveSchema); Assert.assertEquals("Should be a single FieldSchema", 1, fields.size()); Assert.assertEquals("Should be named \"column\"", "column", fields.get(0).getName()); Assert.assertEquals("Should be named \"column\"", STRING_TYPE_INFO.toString(), fields.get(0).getType()); } @Test public void testConvertSchemaWithSimpleRecord() { // convertSchema returns a list of FieldSchema objects rather than TypeInfo List<FieldSchema> fields = HiveSchemaConverter.convertSchema(SIMPLE_RECORD); Assert.assertEquals("Field names should match", Lists.newArrayList("id", "name"), Lists.transform(fields, GET_NAMES)); Assert.assertEquals("Field types should match", Lists.newArrayList( INT_TYPE_INFO.toString(), STRING_TYPE_INFO.toString()), Lists.transform(fields, GET_TYPE_STRINGS)); } @Test public void testConvertSchemaWithComplexRecord() { // convertSchema returns a list of FieldSchema objects rather than TypeInfo List<FieldSchema> fields = HiveSchemaConverter.convertSchema(COMPLEX_RECORD); Assert.assertEquals("Field names should match", Lists.newArrayList("groupName", "simpleRecords"), Lists.transform(fields, GET_NAMES)); Assert.assertEquals("Field types should match", Lists.newArrayList( STRING_TYPE_INFO.toString(), TypeInfoFactory.getListTypeInfo( TypeInfoFactory.getStructTypeInfo( Lists.newArrayList("id", "name"), Lists.newArrayList( INT_TYPE_INFO, STRING_TYPE_INFO))).toString()), Lists.transform(fields, GET_TYPE_STRINGS)); } @Test public void testSimpleRecord() { TypeInfo type = HiveSchemaConverter.convert(SIMPLE_RECORD); Assert.assertTrue("Record should be converted to struct", type instanceof StructTypeInfo); Assert.assertEquals("Field names should match", Lists.newArrayList("id", "name"), ((StructTypeInfo) type).getAllStructFieldNames()); Assert.assertEquals("Field types should match", Lists.newArrayList( INT_TYPE_INFO, STRING_TYPE_INFO), ((StructTypeInfo) type).getAllStructFieldTypeInfos()); } @Test public void testArray() { TypeInfo type = HiveSchemaConverter.convert(SchemaBuilder.array() .items().floatType()); Assert.assertEquals("Array should be converted to list", TypeInfoFactory.getListTypeInfo(FLOAT_TYPE_INFO), type); } @Test public void testMap() { TypeInfo type = HiveSchemaConverter.convert(SchemaBuilder.builder().map() .values().booleanType()); Assert.assertEquals("Map should be converted to map", TypeInfoFactory.getMapTypeInfo(STRING_TYPE_INFO, BOOLEAN_TYPE_INFO), type); } @Test public void testUnion() { TypeInfo type = HiveSchemaConverter.convert(SchemaBuilder.builder().unionOf() .bytesType().and() .fixed("fixed").size(12).and() .doubleType().and() .longType() .endUnion()); Assert.assertEquals("Union should be converted to union", TypeInfoFactory.getUnionTypeInfo(Lists.newArrayList( BINARY_TYPE_INFO, BINARY_TYPE_INFO, DOUBLE_TYPE_INFO, LONG_TYPE_INFO)), type); } @Test public void testEnum() { TypeInfo type = HiveSchemaConverter.convert(SchemaBuilder.builder() .enumeration("TestEnum").symbols("a", "b", "c")); Assert.assertEquals("Enum should be converted to string", STRING_TYPE_INFO, type); } @Test(expected=IllegalStateException.class) public void testRecursiveRecord() { Schema recursiveRecord = SchemaBuilder.record("RecursiveRecord").fields() .name("name").type().stringType().noDefault() .name("children").type().array().items() .type("RecursiveRecord").noDefault() .endRecord(); HiveSchemaConverter.convert(recursiveRecord); } }