/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.hive;
import com.google.common.collect.Lists;
import java.util.LinkedList;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.junit.Assert;
import org.junit.Test;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.TestHelpers;
import static org.kitesdk.data.spi.hive.HiveSchemaConverter.NO_REQUIRED_FIELDS;
import static org.kitesdk.data.spi.hive.HiveSchemaConverter.NULL_DEFAULT;
import static org.kitesdk.data.spi.hive.HiveSchemaConverter.optional;
import static org.kitesdk.data.spi.hive.HiveSchemaConverter.parseTypeInfo;
public class TestTableConversion {
private final LinkedList<String> startPath = Lists.newLinkedList();
private Schema convertPrimitive(String type) {
return HiveSchemaConverter.convert(startPath, "test",
parseTypeInfo(type), NO_REQUIRED_FIELDS);
}
@Test
public void testConvertPrimitives() {
Assert.assertEquals(
Schema.create(Schema.Type.BOOLEAN), convertPrimitive("boolean"));
Assert.assertEquals(
Schema.create(Schema.Type.INT), convertPrimitive("tinyint"));
Assert.assertEquals(
Schema.create(Schema.Type.INT), convertPrimitive("smallint"));
Assert.assertEquals(
Schema.create(Schema.Type.INT), convertPrimitive("int"));
Assert.assertEquals(
Schema.create(Schema.Type.LONG), convertPrimitive("bigint"));
Assert.assertEquals(
Schema.create(Schema.Type.FLOAT), convertPrimitive("float"));
Assert.assertEquals(
Schema.create(Schema.Type.DOUBLE), convertPrimitive("double"));
Assert.assertEquals(
Schema.create(Schema.Type.STRING), convertPrimitive("string"));
Assert.assertEquals(
Schema.create(Schema.Type.BYTES), convertPrimitive("binary"));
if (HiveSchemaConverter.charClass != null) {
Assert.assertEquals(
Schema.create(Schema.Type.STRING), convertPrimitive("char(10)"));
}
if (HiveSchemaConverter.varcharClass != null) {
Assert.assertEquals(
Schema.create(Schema.Type.STRING), convertPrimitive("varchar(32)"));
}
if (HiveSchemaConverter.decimalClass != null) {
TestHelpers.assertThrows("Should reject unknown type",
IllegalArgumentException.class, new Runnable() {
@Override
public void run() {
convertPrimitive("decimal(2,4)");
}
});
}
}
@Test
public void testConvertArrays() {
TypeInfo arrayOfStringsType = parseTypeInfo("array<string>");
Schema arrayOfStringsSchema = Schema.createArray(
optional(Schema.create(Schema.Type.STRING)));
Assert.assertEquals("Should convert array of primitive",
arrayOfStringsSchema,
HiveSchemaConverter.convert(
startPath, "test", arrayOfStringsType, NO_REQUIRED_FIELDS));
TypeInfo arrayOfArraysType = parseTypeInfo("array<array<string>>");
Schema arrayOfArraysSchema = Schema.createArray(
optional(arrayOfStringsSchema));
Assert.assertEquals("Should convert array of arrays",
arrayOfArraysSchema,
HiveSchemaConverter.convert(
startPath, "test", arrayOfArraysType, NO_REQUIRED_FIELDS));
TypeInfo arrayOfMapsType = parseTypeInfo("array<map<string,float>>");
Schema arrayOfMapsSchema = Schema.createArray(
optional(Schema.createMap(optional(Schema.create(Schema.Type.FLOAT)))));
Assert.assertEquals("Should convert array of maps",
arrayOfMapsSchema,
HiveSchemaConverter.convert(
startPath, "test", arrayOfMapsType, NO_REQUIRED_FIELDS));
TypeInfo arrayOfStructsType = parseTypeInfo(
"array<struct<a:array<array<string>>,b:array<map<string,float>>>>");
Schema recordSchema = Schema.createRecord("test", null, null, false);
recordSchema.setFields(Lists.newArrayList(
new Schema.Field("a", optional(arrayOfArraysSchema), null, NULL_DEFAULT),
new Schema.Field("b", optional(arrayOfMapsSchema), null, NULL_DEFAULT)
));
Schema arrayOfStructsSchema = Schema.createArray(optional(recordSchema));
Assert.assertEquals("Should convert array of structs",
arrayOfStructsSchema,
HiveSchemaConverter.convert(
startPath, "test", arrayOfStructsType, NO_REQUIRED_FIELDS));
}
@Test
public void testConvertMaps() {
TypeInfo mapOfLongsType = parseTypeInfo("map<string,bigint>");
Schema mapOfLongsSchema = Schema.createMap(
optional(Schema.create(Schema.Type.LONG)));
Assert.assertEquals("Should convert map of primitive",
mapOfLongsSchema,
HiveSchemaConverter.convert(
startPath, "test", mapOfLongsType, NO_REQUIRED_FIELDS));
TypeInfo mapOfArraysType = parseTypeInfo("array<float>");
Schema mapOfArraysSchema = Schema.createArray(
optional(Schema.create(Schema.Type.FLOAT)));
Assert.assertEquals("Should convert map of arrays",
mapOfArraysSchema,
HiveSchemaConverter.convert(
startPath, "test", mapOfArraysType, NO_REQUIRED_FIELDS));
TypeInfo mapOfMapsType = parseTypeInfo(
"array<map<string,map<string,bigint>>>");
Schema mapOfMapsSchema = Schema.createArray(
optional(Schema.createMap(optional(mapOfLongsSchema))));
Assert.assertEquals("Should convert map of maps",
mapOfMapsSchema,
HiveSchemaConverter.convert(
startPath, "test", mapOfMapsType, NO_REQUIRED_FIELDS));
TypeInfo mapOfStructsType = parseTypeInfo("map<string," +
"struct<a:array<float>,b:array<map<string,map<string,bigint>>>>>");
Schema recordSchema = Schema.createRecord("test", null, null, false);
recordSchema.setFields(Lists.newArrayList(
new Schema.Field("a", optional(mapOfArraysSchema), null, NULL_DEFAULT),
new Schema.Field("b", optional(mapOfMapsSchema), null, NULL_DEFAULT)
));
Schema mapOfStructsSchema = Schema.createMap(optional(recordSchema));
Assert.assertEquals("Should convert map of structs",
mapOfStructsSchema,
HiveSchemaConverter.convert(
startPath, "test", mapOfStructsType, NO_REQUIRED_FIELDS));
}
private static final TypeInfo STRUCT_OF_STRUCTS_TYPE = parseTypeInfo(
"struct<str:string,inner:struct<a:int,b:binary>>");
@Test
public void testConvertStructs() {
Schema recordSchema = Schema.createRecord("inner", null, null, false);
recordSchema.setFields(Lists.newArrayList(
new Schema.Field("a",
optional(Schema.create(Schema.Type.INT)), null, NULL_DEFAULT),
new Schema.Field("b",
optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT)
));
Schema structOfStructsSchema = Schema.createRecord("test", null, null, false);
structOfStructsSchema.setFields(Lists.newArrayList(
new Schema.Field("str",
optional(Schema.create(Schema.Type.STRING)), null, NULL_DEFAULT),
new Schema.Field("inner", optional(recordSchema), null, NULL_DEFAULT)
));
Assert.assertEquals("Should convert struct of structs",
structOfStructsSchema,
HiveSchemaConverter.convert(
startPath, "test", STRUCT_OF_STRUCTS_TYPE, NO_REQUIRED_FIELDS));
}
@Test
public void testConvertStructWithRequiredFields() {
Schema recordSchema = Schema.createRecord("inner", null, null, false);
recordSchema.setFields(Lists.newArrayList(
new Schema.Field("a", Schema.create(Schema.Type.INT), null, null),
new Schema.Field("b",
optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT)
));
Schema structOfStructsSchema = Schema.createRecord("test", null, null, false);
structOfStructsSchema.setFields(Lists.newArrayList(
new Schema.Field("str", Schema.create(Schema.Type.STRING), null, null),
new Schema.Field("inner", recordSchema, null, null)
));
Assert.assertEquals("Should convert struct of structs",
structOfStructsSchema,
HiveSchemaConverter.convert(
startPath, "test", STRUCT_OF_STRUCTS_TYPE,
Lists.newArrayList(
new String[]{"test", "str"},
new String[]{"test", "inner", "a"})));
}
private static final List<FieldSchema> TABLE = Lists.newArrayList(
new FieldSchema("str", "string", null),
new FieldSchema("inner", "struct<a:int,b:binary>", null)
);
@Test
public void testConvertTable() {
Schema recordSchema = Schema.createRecord("inner", null, null, false);
recordSchema.setFields(Lists.newArrayList(
new Schema.Field("a",
optional(Schema.create(Schema.Type.INT)), null, NULL_DEFAULT),
new Schema.Field("b",
optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT)
));
Schema structOfStructsSchema = Schema.createRecord("test", null, null, false);
structOfStructsSchema.setFields(Lists.newArrayList(
new Schema.Field("str",
optional(Schema.create(Schema.Type.STRING)), null, NULL_DEFAULT),
new Schema.Field("inner", optional(recordSchema), null, NULL_DEFAULT)
));
Assert.assertEquals("Should convert struct of structs",
structOfStructsSchema,
HiveSchemaConverter.convertTable("test", TABLE, null));
}
@Test
public void testConvertTableWithRequiredFields() {
Schema recordSchema = Schema.createRecord("inner", null, null, false);
recordSchema.setFields(Lists.newArrayList(
new Schema.Field("a", Schema.create(Schema.Type.INT), null, null),
new Schema.Field("b",
optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT)
));
Schema structOfStructsSchema = Schema.createRecord("test", null, null, false);
structOfStructsSchema.setFields(Lists.newArrayList(
new Schema.Field("str", Schema.create(Schema.Type.STRING), null, null),
new Schema.Field("inner", recordSchema, null, null)
));
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("not_present", "int")
.hash("inner.a", 16) // requires both inner and inner.a
.identity("str")
.build();
Assert.assertEquals("Should convert table named test",
structOfStructsSchema,
HiveSchemaConverter.convertTable("test", TABLE, strategy));
}
}