/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy.hive; import java.io.IOException; import java.util.List; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.SetMultimap; import com.google.common.collect.Sets; import com.typesafe.config.Config; import gobblin.hive.HiveMetastoreClientPool; import gobblin.util.AutoReturnableObject; public class HiveDatasetFinderTest { @Test public void testDatasetFinder() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 3); } @Test public void testException() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", TestHiveDatasetFinder.THROW_EXCEPTION)); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); } @Test public void testWhitelist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "db1"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); } @Test public void testBlacklist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.BLACKLIST, "db2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); } @Test public void testTableList() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.DB_KEY, "db1"); properties.put(HiveDatasetFinder.TABLE_PATTERN_KEY, "table1|table2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); } @Test public void testDatasetConfig() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put("hive.dataset.test.conf1", "conf1-val1"); properties.put("hive.dataset.test.conf2", "conf2-val2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 1); HiveDataset hiveDataset = datasets.get(0); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf1"), "conf1-val1"); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf2"), "conf2-val2"); // Test scoped configs with prefix properties.put(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.dataset.test"); finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 1); hiveDataset = datasets.get(0); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf1"), "conf1-val1"); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf2"), "conf2-val2"); } private HiveMetastoreClientPool getTestPool(List<HiveDatasetFinder.DbAndTable> dbAndTables) throws Exception { SetMultimap<String, String> entities = HashMultimap.create(); for (HiveDatasetFinder.DbAndTable dbAndTable : dbAndTables) { entities.put(dbAndTable.getDb(), dbAndTable.getTable()); } HiveMetastoreClientPool pool = Mockito.mock(HiveMetastoreClientPool.class); IMetaStoreClient client = Mockito.mock(IMetaStoreClient.class); Mockito.when(client.getAllDatabases()).thenReturn(Lists.newArrayList(entities.keySet())); for (String db : entities.keySet()) { Mockito.doReturn(Lists.newArrayList(entities.get(db))).when(client).getAllTables(db); } for (HiveDatasetFinder.DbAndTable dbAndTable : dbAndTables) { Table table = new Table(); table.setDbName(dbAndTable.getDb()); table.setTableName(dbAndTable.getTable()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation("/tmp/test"); table.setSd(sd); Mockito.doReturn(table).when(client).getTable(dbAndTable.getDb(), dbAndTable.getTable()); } @SuppressWarnings("unchecked") AutoReturnableObject<IMetaStoreClient> aro = Mockito.mock(AutoReturnableObject.class); Mockito.when(aro.get()).thenReturn(client); Mockito.when(pool.getHiveRegProps()).thenReturn(null); Mockito.when(pool.getClient()).thenReturn(aro); return pool; } private class TestHiveDatasetFinder extends HiveDatasetFinder { public static final String THROW_EXCEPTION = "throw_exception"; public TestHiveDatasetFinder(FileSystem fs, Properties properties, HiveMetastoreClientPool pool) throws IOException { super(fs, properties, pool); } @Override protected HiveDataset createHiveDataset(Table table, Config config) throws IOException { if (table.getTableName().equals(THROW_EXCEPTION)) { throw new IOException("bad table"); } return new HiveDataset(super.fs, super.clientPool, new org.apache.hadoop.hive.ql.metadata.Table(table), config); } } }