/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.util.orc;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Unit tests for the NiFiOrcUtils helper class
*/
public class TestNiFiOrcUtils {
@Test
public void test_getOrcField_primitive() throws Exception {
// Expected ORC types
TypeInfo[] expectedTypes = {
TypeInfoFactory.getPrimitiveTypeInfo("int"),
TypeInfoFactory.getPrimitiveTypeInfo("bigint"),
TypeInfoFactory.getPrimitiveTypeInfo("boolean"),
TypeInfoFactory.getPrimitiveTypeInfo("float"),
TypeInfoFactory.getPrimitiveTypeInfo("double"),
TypeInfoFactory.getPrimitiveTypeInfo("binary"),
TypeInfoFactory.getPrimitiveTypeInfo("string")
};
// Build a fake Avro record with all types
Schema testSchema = buildPrimitiveAvroSchema();
List<Schema.Field> fields = testSchema.getFields();
for (int i = 0; i < fields.size(); i++) {
assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema()));
}
}
@Test
public void test_getOrcField_union_optional_type() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("union").type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
assertEquals(TypeInfoCreator.createBoolean(), orcType);
}
@Test
public void test_getOrcField_union() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
assertEquals(
TypeInfoFactory.getUnionTypeInfo(Arrays.asList(
TypeInfoCreator.createInt(),
TypeInfoCreator.createBoolean())),
orcType);
}
@Test
public void test_getOrcField_map() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("map").type().map().values().doubleType().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
assertEquals(
TypeInfoFactory.getMapTypeInfo(
TypeInfoCreator.createString(),
TypeInfoCreator.createDouble()),
orcType);
}
@Test
public void test_getOrcField_nested_map() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("map").type().map().values().map().values().doubleType().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
assertEquals(
TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(),
TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())),
orcType);
}
@Test
public void test_getOrcField_array() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("array").type().array().items().longType().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
assertEquals(
TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()),
orcType);
}
@Test
public void test_getOrcField_complex_array() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("array").type().array().items().map().values().floatType().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
assertEquals(
TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createFloat())),
orcType);
}
@Test
public void test_getOrcField_record() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("int").type().intType().noDefault();
builder.name("long").type().longType().longDefault(1L);
builder.name("array").type().array().items().stringType().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema);
assertEquals(
TypeInfoFactory.getStructTypeInfo(
Arrays.asList("int", "long", "array"),
Arrays.asList(
TypeInfoCreator.createInt(),
TypeInfoCreator.createLong(),
TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
orcType);
}
@Test
public void test_getOrcField_enum() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a");
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema());
assertEquals(TypeInfoCreator.createString(), orcType);
}
@Test
public void test_getPrimitiveOrcTypeFromPrimitiveAvroType() throws Exception {
// Expected ORC types
TypeInfo[] expectedTypes = {
TypeInfoCreator.createInt(),
TypeInfoCreator.createLong(),
TypeInfoCreator.createBoolean(),
TypeInfoCreator.createFloat(),
TypeInfoCreator.createDouble(),
TypeInfoCreator.createBinary(),
TypeInfoCreator.createString(),
};
Schema testSchema = buildPrimitiveAvroSchema();
List<Schema.Field> fields = testSchema.getFields();
for (int i = 0; i < fields.size(); i++) {
assertEquals(expectedTypes[i], NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(fields.get(i).schema().getType()));
}
}
@Test(expected = IllegalArgumentException.class)
public void test_getPrimitiveOrcTypeFromPrimitiveAvroType_badType() throws Exception {
Schema.Type nonPrimitiveType = Schema.Type.ARRAY;
NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(nonPrimitiveType);
}
@Test
public void test_getWritable() throws Exception {
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable);
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable);
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable);
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable);
assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[]{1, 2, 3}) instanceof List);
assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List);
Map<String, Float> map = new HashMap<>();
map.put("Hello", 1.0f);
map.put("World", 2.0f);
Object writable = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map);
assertTrue(writable instanceof MapWritable);
MapWritable mapWritable = (MapWritable) writable;
mapWritable.forEach((key, value) -> {
assertTrue(key instanceof Text);
assertTrue(value instanceof FloatWritable);
});
}
@Test
public void test_getHiveTypeFromAvroType_primitive() throws Exception {
// Expected ORC types
String[] expectedTypes = {
"INT",
"BIGINT",
"BOOLEAN",
"FLOAT",
"DOUBLE",
"BINARY",
"STRING",
};
Schema testSchema = buildPrimitiveAvroSchema();
List<Schema.Field> fields = testSchema.getFields();
for (int i = 0; i < fields.size(); i++) {
assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema()));
}
}
@Test
public void test_getHiveTypeFromAvroType_complex() throws Exception {
// Expected ORC types
String[] expectedTypes = {
"INT",
"MAP<STRING, DOUBLE>",
"STRING",
"UNIONTYPE<BIGINT, FLOAT>",
"ARRAY<INT>"
};
Schema testSchema = buildComplexAvroSchema();
List<Schema.Field> fields = testSchema.getFields();
for (int i = 0; i < fields.size(); i++) {
assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema()));
}
assertEquals("STRUCT<myInt:INT, myMap:MAP<STRING, DOUBLE>, myEnum:STRING, myLongOrFloat:UNIONTYPE<BIGINT, FLOAT>, myIntList:ARRAY<INT>>",
NiFiOrcUtils.getHiveTypeFromAvroType(testSchema));
}
@Test
public void test_generateHiveDDL_primitive() throws Exception {
Schema avroSchema = buildPrimitiveAvroSchema();
String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable");
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
+ " STORED AS ORC", ddl);
}
@Test
public void test_generateHiveDDL_complex() throws Exception {
Schema avroSchema = buildComplexAvroSchema();
String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable");
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable "
+ "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)"
+ " STORED AS ORC", ddl);
}
//////////////////
// Helper methods
//////////////////
public static Schema buildPrimitiveAvroSchema() {
// Build a fake Avro record with all primitive types
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
builder.name("int").type().intType().noDefault();
builder.name("long").type().longType().longDefault(1L);
builder.name("boolean").type().booleanType().booleanDefault(true);
builder.name("float").type().floatType().floatDefault(0.0f);
builder.name("double").type().doubleType().doubleDefault(0.0);
builder.name("bytes").type().bytesType().noDefault();
builder.name("string").type().stringType().stringDefault("default");
return builder.endRecord();
}
public static GenericData.Record buildPrimitiveAvroRecord(int i, long l, boolean b, float f, double d, ByteBuffer bytes, String string) {
Schema schema = buildPrimitiveAvroSchema();
GenericData.Record row = new GenericData.Record(schema);
row.put("int", i);
row.put("long", l);
row.put("boolean", b);
row.put("float", f);
row.put("double", d);
row.put("bytes", bytes);
row.put("string", string);
return row;
}
public static TypeInfo buildPrimitiveOrcSchema() {
return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
Arrays.asList(
TypeInfoCreator.createInt(),
TypeInfoCreator.createLong(),
TypeInfoCreator.createBoolean(),
TypeInfoCreator.createFloat(),
TypeInfoCreator.createDouble(),
TypeInfoCreator.createBinary(),
TypeInfoCreator.createString()));
}
public static Schema buildComplexAvroSchema() {
// Build a fake Avro record with nested types
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("complex.record").namespace("any.data").fields();
builder.name("myInt").type().unionOf().nullType().and().intType().endUnion().nullDefault();
builder.name("myMap").type().map().values().doubleType().noDefault();
builder.name("myEnum").type().enumeration("myEnum").symbols("ABC", "DEF", "XYZ").enumDefault("ABC");
builder.name("myLongOrFloat").type().unionOf().longType().and().floatType().endUnion().noDefault();
builder.name("myIntList").type().array().items().intType().noDefault();
return builder.endRecord();
}
public static GenericData.Record buildComplexAvroRecord(Integer i, Map<String, Double> m, String e, Object unionVal, List<Integer> intArray) {
Schema schema = buildComplexAvroSchema();
GenericData.Record row = new GenericData.Record(schema);
row.put("myInt", i);
row.put("myMap", m);
row.put("myEnum", e);
row.put("myLongOrFloat", unionVal);
row.put("myIntList", intArray);
return row;
}
public static TypeInfo buildComplexOrcSchema() {
return TypeInfoUtils.getTypeInfoFromTypeString("struct<myInt:int,myMap:map<string,double>,myEnum:string,myLongOrFloat:uniontype<int>,myIntList:array<int>>");
}
private static class TypeInfoCreator {
static TypeInfo createInt() {
return TypeInfoFactory.getPrimitiveTypeInfo("int");
}
static TypeInfo createLong() {
return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
}
static TypeInfo createBoolean() {
return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
}
static TypeInfo createFloat() {
return TypeInfoFactory.getPrimitiveTypeInfo("float");
}
static TypeInfo createDouble() {
return TypeInfoFactory.getPrimitiveTypeInfo("double");
}
static TypeInfo createBinary() {
return TypeInfoFactory.getPrimitiveTypeInfo("binary");
}
static TypeInfo createString() {
return TypeInfoFactory.getPrimitiveTypeInfo("string");
}
}
}