/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.conversion.hive.dataset; import java.io.InputStream; import java.util.Properties; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset.ConversionConfig; import gobblin.hive.HiveMetastoreClientPool; import gobblin.util.ConfigUtils; @Test(groups = { "gobblin.data.management.conversion" }) public class ConvertibleHiveDatasetTest { @Test public void testFlattenedOrcConfig() throws Exception { String testConfFilePath = "convertibleHiveDatasetTest/flattenedOrc.conf"; Config config = ConfigFactory.parseResources(testConfFilePath).getConfig("hive.conversion.avro"); ConvertibleHiveDataset cd = createTestConvertibleDataset(config); Assert.assertEquals(cd.getDestFormats(), ImmutableSet.of("flattenedOrc")); Assert.assertTrue(cd.getConversionConfigForFormat("flattenedOrc").isPresent()); validateFlattenedConfig(cd.getConversionConfigForFormat("flattenedOrc").get()); } @Test public void testFlattenedAndNestedOrcConfig() throws Exception { String testConfFilePath = "convertibleHiveDatasetTest/flattenedAndNestedOrc.conf"; Config config = ConfigFactory.parseResources(testConfFilePath).getConfig("hive.conversion.avro"); ConvertibleHiveDataset cd = createTestConvertibleDataset(config); Assert.assertEquals(cd.getDestFormats(), ImmutableSet.of("flattenedOrc", "nestedOrc")); Assert.assertTrue(cd.getConversionConfigForFormat("flattenedOrc").isPresent()); Assert.assertTrue(cd.getConversionConfigForFormat("nestedOrc").isPresent()); validateFlattenedConfig(cd.getConversionConfigForFormat("flattenedOrc").get()); validateNestedOrc(cd.getConversionConfigForFormat("nestedOrc").get()); } @Test public void testFlattenedAndNestedOrcProps() throws Exception { String testConfFilePath = "convertibleHiveDatasetTest/flattenedAndNestedOrc.properties"; Properties jobProps = new Properties(); try (final InputStream stream = ConvertibleHiveDatasetTest.class.getClassLoader().getResourceAsStream(testConfFilePath)) { jobProps.load(stream); } Config config = ConfigUtils.propertiesToConfig(jobProps).getConfig("hive.conversion.avro"); ConvertibleHiveDataset cd = createTestConvertibleDataset(config); Assert.assertEquals(cd.getDestFormats(), ImmutableSet.of("flattenedOrc", "nestedOrc")); Assert.assertTrue(cd.getConversionConfigForFormat("flattenedOrc").isPresent()); Assert.assertTrue(cd.getConversionConfigForFormat("nestedOrc").isPresent()); validateFlattenedConfig(cd.getConversionConfigForFormat("flattenedOrc").get()); validateNestedOrc(cd.getConversionConfigForFormat("nestedOrc").get()); } @Test public void testInvalidFormat() { Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>of("destinationFormats", "flattenedOrc,nestedOrc")); ConvertibleHiveDataset cd = createTestConvertibleDataset(config); Assert.assertFalse(cd.getConversionConfigForFormat("invalidFormat").isPresent()); } @Test public void testDisableFormat() { Config config = ConfigFactory.parseMap(ImmutableMap.<String, String> builder() .put("destinationFormats", "flattenedOrc") .put("flattenedOrc.destination.tableName","d") .put("flattenedOrc.destination.dbName","d") .put("flattenedOrc.destination.dataPath","d") .put("nestedOrc.destination.tableName","d") .put("nestedOrc.destination.dbName","d") .put("nestedOrc.destination.dataPath","d") .build()); ConvertibleHiveDataset cd = createTestConvertibleDataset(config); Assert.assertTrue(cd.getConversionConfigForFormat("flattenedOrc").isPresent()); Assert.assertFalse(cd.getConversionConfigForFormat("nestedOrc").isPresent()); } private void validateFlattenedConfig(ConversionConfig conversionConfig) { Assert.assertEquals(conversionConfig.getDestinationDbName(), "db1_flattenedOrcDb"); Assert.assertEquals(conversionConfig.getDestinationTableName(), "tb1_flattenedOrc"); Assert.assertEquals(conversionConfig.getDestinationDataPath(), "/tmp/data_flattenedOrc/db1/tb1"); Assert.assertEquals(conversionConfig.getClusterBy(), ImmutableList.of("c1", "c2")); Assert.assertEquals(conversionConfig.getNumBuckets().get(), Integer.valueOf(4)); Properties hiveProps = new Properties(); hiveProps.setProperty("mapred.map.tasks", "10,12"); hiveProps.setProperty("hive.merge.mapfiles", "false"); Assert.assertEquals(conversionConfig.getHiveRuntimeProperties(), hiveProps); } private void validateNestedOrc(ConversionConfig conversionConfig) { Assert.assertEquals(conversionConfig.getDestinationDbName(), "db1_nestedOrcDb"); Assert.assertEquals(conversionConfig.getDestinationTableName(), "tb1_nestedOrc"); Assert.assertEquals(conversionConfig.getDestinationViewName().get(), "tb1_view"); Assert.assertEquals(conversionConfig.getDestinationDataPath(), "/tmp/data_nestedOrc/db1/tb1"); Assert.assertEquals(conversionConfig.isUpdateViewAlwaysEnabled(), false); Assert.assertEquals(conversionConfig.getClusterBy(), ImmutableList.of("c3", "c4")); Assert.assertEquals(conversionConfig.getNumBuckets().get(), Integer.valueOf(5)); Properties hiveProps = new Properties(); hiveProps = new Properties(); hiveProps.setProperty("mapred.map.tasks", "12"); Assert.assertEquals(conversionConfig.getHiveRuntimeProperties(), hiveProps); } public static ConvertibleHiveDataset createTestConvertibleDataset(Config config) { Table table = getTestTable("db1", "tb1"); ConvertibleHiveDataset cd = new ConvertibleHiveDataset(Mockito.mock(FileSystem.class), Mockito.mock(HiveMetastoreClientPool.class), new org.apache.hadoop.hive.ql.metadata.Table( table), new Properties(), config); return cd; } public static Table getTestTable(String dbName, String tableName) { Table table = new Table(); table.setDbName(dbName); table.setTableName(tableName); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation("/tmp/test"); table.setSd(sd); return table; } }