/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.conversion.hive.util; import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.avro.Schema; import org.apache.hadoop.hive.metastore.api.Table; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import gobblin.data.management.ConversionHiveTestUtils; import gobblin.data.management.conversion.hive.query.HiveAvroORCQueryGenerator; import gobblin.util.AvroFlattener; @Test(groups = { "gobblin.data.management.conversion" }) public class HiveAvroORCQueryGeneratorTest { private static String resourceDir = "avroToOrcQueryUtilsTest"; private static Optional<Table> destinationTableMeta = Optional.absent(); private static boolean isEvolutionEnabled = true; private static Optional<Integer> rowLimit = Optional.absent(); /*** * Test DDL generation for schema structured as: Array within record within array within record * @throws IOException */ @Test public void testArrayWithinRecordWithinArrayWithinRecordDDL() throws IOException { String schemaName = "testArrayWithinRecordWithinArrayWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "arrayWithinRecordWithinArrayWithinRecord_nested.json"); String q = HiveAvroORCQueryGenerator .generateCreateTableDDL(schema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); Assert.assertEquals(q, ConversionHiveTestUtils.readQueryFromFile(resourceDir, "arrayWithinRecordWithinArrayWithinRecord_nested.ddl")); } /*** * Test DDL generation for schema structured as: option within option within record * @throws IOException */ @Test public void testOptionWithinOptionWithinRecordDDL() throws IOException { String schemaName = "testOptionWithinOptionWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "optionWithinOptionWithinRecord_nested.json"); String q = HiveAvroORCQueryGenerator .generateCreateTableDDL(schema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); Assert.assertEquals(q, ConversionHiveTestUtils.readQueryFromFile(resourceDir, "optionWithinOptionWithinRecord_nested.ddl")); } /*** * Test DDL generation for schema structured as: record within option within record * @throws IOException */ @Test public void testRecordWithinOptionWithinRecordDDL() throws IOException { String schemaName = "testRecordWithinOptionWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinOptionWithinRecord_nested.json"); String q = HiveAvroORCQueryGenerator .generateCreateTableDDL(schema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); Assert.assertEquals(q.trim(), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinOptionWithinRecord_nested.ddl")); } /*** * Test DDL generation for schema structured as: record within record within record * @throws IOException */ @Test public void testRecordWithinRecordWithinRecordDDL() throws IOException { String schemaName = "testRecordWithinRecordWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); String q = HiveAvroORCQueryGenerator .generateCreateTableDDL(schema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); Assert.assertEquals(q.trim(), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.ddl")); } /*** * Test DDL generation for schema structured as: record within record within record after flattening * @throws IOException */ @Test public void testRecordWithinRecordWithinRecordFlattenedDDL() throws IOException { String schemaName = "testRecordWithinRecordWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); String q = HiveAvroORCQueryGenerator .generateCreateTableDDL(flattenedSchema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); Assert.assertEquals(q, ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.ddl")); } /*** * Test DML generation * @throws IOException */ @Test public void testRecordWithinRecordWithinRecordFlattenedDML() throws IOException { String schemaName = "testRecordWithinRecordWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); String q = HiveAvroORCQueryGenerator .generateTableMappingDML(schema, flattenedSchema, schemaName, schemaName + "_orc", Optional.<String>absent(), Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<Boolean>absent(), Optional.<Boolean>absent(), isEvolutionEnabled, destinationTableMeta, rowLimit); Assert.assertEquals(q.trim(), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord.dml")); } /*** * Test Multi-partition DDL generation * @throws IOException */ @Test public void testMultiPartitionDDL() throws IOException { String schemaName = "testMultiPartitionDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); Map<String, String> partitionDDLInfo = ImmutableMap.of("datepartition", "string", "id", "int", "country", "string"); String q = HiveAvroORCQueryGenerator .generateCreateTableDDL(flattenedSchema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.of(partitionDDLInfo), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); Assert.assertEquals(q, ConversionHiveTestUtils.readQueryFromFile(resourceDir, "testMultiPartition.ddl")); } /*** * Test Multi-partition DML generation * @throws IOException */ @Test public void testMultiPartitionDML() throws IOException { String schemaName = "testMultiPartitionDML"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); Map<String, String> partitionDMLInfo = ImmutableMap.of("datepartition", "2016-01-01", "id", "101", "country", "US"); String q = HiveAvroORCQueryGenerator .generateTableMappingDML(schema, flattenedSchema, schemaName, schemaName + "_orc", Optional.<String>absent(), Optional.<String>absent(), Optional.of(partitionDMLInfo), Optional.<Boolean>absent(), Optional.<Boolean>absent(), isEvolutionEnabled, destinationTableMeta, rowLimit); Assert.assertEquals(q.trim(), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "testMultiPartition.dml")); } /*** * Test bad schema * @throws IOException */ @Test(expectedExceptions = IllegalArgumentException.class) public void testNonRecordRootSchemaDDL() throws Exception { String schemaName = "nonRecordRootSchema"; Schema schema = Schema.create(Schema.Type.STRING); HiveAvroORCQueryGenerator .generateCreateTableDDL(schema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), null, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); } /*** * Test DML generation with row limit * @throws IOException */ @Test public void testFlattenedDMLWithRowLimit() throws IOException { String schemaName = "testRecordWithinRecordWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); Optional<Integer> rowLimit = Optional.of(1); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); String q = HiveAvroORCQueryGenerator .generateTableMappingDML(schema, flattenedSchema, schemaName, schemaName + "_orc", Optional.<String>absent(), Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<Boolean>absent(), Optional.<Boolean>absent(), isEvolutionEnabled, destinationTableMeta, rowLimit); Assert.assertEquals(q.trim(), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "flattenedWithRowLimit.dml")); } @Test public void testDropPartitions() throws Exception { // Test multiple partition-spec drop method List<Map<String, String>> partitionDMLInfos = Lists.newArrayList(); partitionDMLInfos.add(ImmutableMap.of("datepartition", "2016-01-01", "sizepartition", "10")); partitionDMLInfos.add(ImmutableMap.of("datepartition", "2016-01-02", "sizepartition", "20")); partitionDMLInfos.add(ImmutableMap.of("datepartition", "2016-01-03", "sizepartition", "30")); List<String> ddl = HiveAvroORCQueryGenerator.generateDropPartitionsDDL("db1", "table1", partitionDMLInfos); Assert.assertEquals(ddl.size(), 2); Assert.assertEquals(ddl.get(0), "USE db1 \n"); Assert.assertEquals(ddl.get(1), "ALTER TABLE table1 DROP IF EXISTS PARTITION (datepartition='2016-01-01',sizepartition='10'), " + "PARTITION (datepartition='2016-01-02',sizepartition='20'), " + "PARTITION (datepartition='2016-01-03',sizepartition='30')"); // Check empty partitions Assert.assertEquals(HiveAvroORCQueryGenerator.generateDropPartitionsDDL("db1", "table1", Collections.<Map<String, String>>emptyList()), Collections.emptyList()); // Test single partition-spec drop method Map<String, String> partitionsDMLInfo = ImmutableMap.of("datepartition", "2016-01-01", "sizepartition", "10"); ddl = HiveAvroORCQueryGenerator.generateDropPartitionsDDL("db1", "table1", partitionsDMLInfo); Assert.assertEquals(ddl.size(), 2); Assert.assertEquals(ddl.get(0), "USE db1\n"); Assert.assertEquals(ddl.get(1), "ALTER TABLE table1 DROP IF EXISTS PARTITION (`datepartition`='2016-01-01', `sizepartition`='10') "); } @Test public void testCreatePartitionDDL() throws Exception { List<String> ddl = HiveAvroORCQueryGenerator.generateCreatePartitionDDL("db1", "table1", "/tmp", ImmutableMap.of("datepartition", "2016-01-01", "sizepartition", "10")); Assert.assertEquals(ddl.size(), 2); Assert.assertEquals(ddl.get(0), "USE db1\n"); Assert.assertEquals(ddl.get(1), "ALTER TABLE `table1` ADD IF NOT EXISTS PARTITION (`datepartition`='2016-01-01', `sizepartition`='10') \n" + " LOCATION '/tmp' "); } @Test public void testDropTableDDL() throws Exception { String ddl = HiveAvroORCQueryGenerator.generateDropTableDDL("db1", "table1"); Assert.assertEquals(ddl, "DROP TABLE IF EXISTS `db1`.`table1`"); } @Test public void testHiveTypeEscaping() throws Exception { String type = "array<struct<singleItems:array<struct<scoredEntity:struct<id:string,score:float," + "sourceName:string,sourceModel:string>,scores:struct<fprScore:double,fprUtility:double," + "calibratedFprUtility:double,sprScore:double,adjustedSprScore:double,sprUtility:double>," + "sponsoredFlag:string,blendingRequestId:string,forExploration:boolean,d2Resource:string," + "restliFinder:string,trackingId:binary,aggregation:struct<positionInAggregation:struct<index:int>," + "typeOfAggregation:string>,decoratedFeedUpdateData:struct<avoData:struct<actorUrn:string,verbType:" + "string,objectUrn:string,objectType:string>,attributedActivityUrn:string,createdTime:bigint,totalLikes:" + "bigint,totalComments:bigint,rootActivity:struct<activityUrn:string,avoData:struct<actorUrn:string," + "verbType:string,objectUrn:string,objectType:string>>>>>,scores:struct<fprScore:double,fprUtility:double," + "calibratedFprUtility:double,sprScore:double,adjustedSprScore:double,sprUtility:double>,position:int>>"; String expectedEscapedType = "array<struct<`singleItems`:array<struct<`scoredEntity`:struct<`id`:string," + "`score`:float,`sourceName`:string,`sourceModel`:string>,`scores`:struct<`fprScore`:double," + "`fprUtility`:double,`calibratedFprUtility`:double,`sprScore`:double,`adjustedSprScore`:double," + "`sprUtility`:double>,`sponsoredFlag`:string,`blendingRequestId`:string,`forExploration`:boolean," + "`d2Resource`:string,`restliFinder`:string,`trackingId`:binary,`aggregation`:struct<`positionInAggregation`" + ":struct<`index`:int>,`typeOfAggregation`:string>,`decoratedFeedUpdateData`:struct<`avoData`:" + "struct<`actorUrn`:string,`verbType`:string,`objectUrn`:string,`objectType`:string>,`attributedActivityUrn`" + ":string,`createdTime`:bigint,`totalLikes`:bigint,`totalComments`:bigint,`rootActivity`:struct<`activityUrn`" + ":string,`avoData`:struct<`actorUrn`:string,`verbType`:string,`objectUrn`:string,`objectType`:string>>>>>," + "`scores`:struct<`fprScore`:double,`fprUtility`:double,`calibratedFprUtility`:double,`sprScore`:double," + "`adjustedSprScore`:double,`sprUtility`:double>,`position`:int>>"; String actualEscapedType = HiveAvroORCQueryGenerator.escapeHiveType(type); Assert.assertEquals(actualEscapedType, expectedEscapedType); } @Test public void testValidTypeEvolution() throws Exception { // Check a few evolved types Assert.assertTrue(HiveAvroORCQueryGenerator.isTypeEvolved("float", "int")); Assert.assertTrue(HiveAvroORCQueryGenerator.isTypeEvolved("double", "float")); Assert.assertTrue(HiveAvroORCQueryGenerator.isTypeEvolved("string", "varchar")); Assert.assertTrue(HiveAvroORCQueryGenerator.isTypeEvolved("double", "string")); // Check if type is same Assert.assertFalse(HiveAvroORCQueryGenerator.isTypeEvolved("int", "int")); } @Test (expectedExceptions = RuntimeException.class) public void testInvalidTypeEvolution() throws Exception { // Check for in-compatible types HiveAvroORCQueryGenerator.isTypeEvolved("boolean", "int"); } @Test public void testCreateOrUpdateViewDDL() throws Exception { // Check if two queries for Create and Update View have been generated List<String> ddls = HiveAvroORCQueryGenerator.generateCreateOrUpdateViewDDL("db1", "tbl1", "db2" ,"view1", true); Assert.assertEquals(ddls.size(), 2, "Two queries for Create and Update should have been generated"); Assert.assertEquals(ddls.get(0), "CREATE VIEW IF NOT EXISTS `db2`.`view1` AS SELECT * FROM `db1`.`tbl1`"); Assert.assertEquals(ddls.get(1), "ALTER VIEW `db2`.`view1` AS SELECT * FROM `db1`.`tbl1`"); // Check if two queries for Create and Update View have been generated ddls = HiveAvroORCQueryGenerator.generateCreateOrUpdateViewDDL("db1", "tbl1", "db2" ,"view1", false); Assert.assertEquals(ddls.size(), 1, "One query for Create only should have been generated"); Assert.assertEquals(ddls.get(0), "CREATE VIEW IF NOT EXISTS `db2`.`view1` AS SELECT * FROM `db1`.`tbl1`"); } }