/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.PartitionSpec; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.security.Permission; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Test to check PartitionSpec support in HiveMetaStore. */ public class TestHiveMetaStorePartitionSpecs { private static final Logger LOG = LoggerFactory.getLogger(TestHiveMetaStorePartitionSpecs.class); private static int msPort; private static HiveConf hiveConf; private static SecurityManager securityManager; public static class NoExitSecurityManager extends SecurityManager { @Override public void checkPermission(Permission perm) { // allow anything. } @Override public void checkPermission(Permission perm, Object context) { // allow anything. } @Override public void checkExit(int status) { super.checkExit(status); throw new RuntimeException("System.exit() was called. Raising exception. "); } } @AfterClass public static void tearDown() throws Exception { LOG.info("Shutting down metastore."); System.setSecurityManager(securityManager); HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf); hmsc.dropDatabase(dbName, true, true, true); } @BeforeClass public static void startMetaStoreServer() throws Exception { HiveConf metastoreConf = new HiveConf(); metastoreConf.setClass(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS.varname, MockPartitionExpressionForMetastore.class, PartitionExpressionProxy.class); msPort = MetaStoreUtils.startMetaStore(metastoreConf); securityManager = System.getSecurityManager(); System.setSecurityManager(new NoExitSecurityManager()); hiveConf = new HiveConf(TestHiveMetaStorePartitionSpecs.class); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); hiveConf.set(HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS.name(), MockPartitionExpressionForMetastore.class.getCanonicalName()); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); } private static String dbName = "testpartitionspecs_db"; private static String tableName = "testpartitionspecs_table"; private static int nDates = 10; private static String datePrefix = "2014010"; private static void createTable(HiveMetaStoreClient hmsc, boolean enablePartitionGrouping) throws Exception { List<FieldSchema> columns = new ArrayList<FieldSchema>(); columns.add(new FieldSchema("foo", "string", "")); columns.add(new FieldSchema("bar", "string", "")); List<FieldSchema> partColumns = new ArrayList<FieldSchema>(); partColumns.add(new FieldSchema("dt", "string", "")); partColumns.add(new FieldSchema("blurb", "string", "")); SerDeInfo serdeInfo = new SerDeInfo("LBCSerDe", LazyBinaryColumnarSerDe.class.getCanonicalName(), new HashMap<String, String>()); StorageDescriptor storageDescriptor = new StorageDescriptor(columns, null, "org.apache.hadoop.hive.ql.io.RCFileInputFormat", "org.apache.hadoop.hive.ql.io.RCFileOutputFormat", false, 0, serdeInfo, null, null, null); Map<String, String> tableParameters = new HashMap<String, String>(); tableParameters.put("hive.hcatalog.partition.spec.grouping.enabled", enablePartitionGrouping? "true":"false"); Table table = new Table(tableName, dbName, "", 0, 0, 0, storageDescriptor, partColumns, tableParameters, "", "", ""); hmsc.createTable(table); Assert.assertTrue("Table " + dbName + "." + tableName + " does not exist", hmsc.tableExists(dbName, tableName)); } private static void clearAndRecreateDB(HiveMetaStoreClient hmsc) throws Exception { hmsc.dropDatabase(dbName, true, // Delete data. true, // Ignore unknownDB. true // Cascade. ); hmsc.createDatabase(new Database(dbName, "", // Description. null, // Location. null // Parameters. )); } // Get partition-path. For grid='XYZ', place the partition outside the table-path. private static String getPartitionPath(Table table, List<String> partValues) { return partValues.get(1).equalsIgnoreCase("isLocatedOutsideTablePath")? // i.e. Is the partition outside the table-dir? table.getSd().getLocation().replace(table.getTableName(), "location_outside_" + table.getTableName()) + "_" + partValues.get(0) + "_" + partValues.get(1) : null ; // Use defaults... Partitions are put in the table directory. } private static void populatePartitions(HiveMetaStoreClient hmsc, Table table, List<String> blurbs) throws Exception { for (int i=0; i< nDates; ++i) { for (String blurb : blurbs) { StorageDescriptor sd = new StorageDescriptor(table.getSd()); // Add partitions located in the table-directory (i.e. default). List<String> values = Arrays.asList(datePrefix + i, blurb); sd.setLocation(getPartitionPath(table, values)); hmsc.add_partition(new Partition(values, dbName, tableName, 0, 0, sd, null)); } } } private void testGetPartitionSpecs(boolean enablePartitionGrouping) { try { HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf); clearAndRecreateDB(hmsc); createTable(hmsc, enablePartitionGrouping); Table table = hmsc.getTable(dbName, tableName); populatePartitions(hmsc, table, Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath")); PartitionSpecProxy partitionSpecProxy = hmsc.listPartitionSpecs(dbName, tableName, -1); Assert.assertEquals( "Unexpected number of partitions.", nDates * 2, partitionSpecProxy.size()); Map<String, List<String>> locationToDateMap = new HashMap<String, List<String>>(); locationToDateMap.put("isLocatedInTablePath", new ArrayList<String>()); locationToDateMap.put("isLocatedOutsideTablePath", new ArrayList<String>()); PartitionSpecProxy.PartitionIterator iterator = partitionSpecProxy.getPartitionIterator(); while (iterator.hasNext()) { Partition partition = iterator.next(); locationToDateMap.get(partition.getValues().get(1)).add(partition.getValues().get(0)); } List<String> expectedDates = new ArrayList<String>(nDates); for (int i=0; i<nDates; ++i) { expectedDates.add(datePrefix + i); } Assert.assertArrayEquals("Unexpected date-values.", expectedDates.toArray(), locationToDateMap.get("isLocatedInTablePath").toArray()); Assert.assertArrayEquals("Unexpected date-values.", expectedDates.toArray(), locationToDateMap.get("isLocatedOutsideTablePath").toArray()); partitionSpecProxy = hmsc.listPartitionSpecsByFilter(dbName, tableName, "blurb = \"isLocatedOutsideTablePath\"", -1); locationToDateMap.get("isLocatedInTablePath").clear(); locationToDateMap.get("isLocatedOutsideTablePath").clear(); iterator = partitionSpecProxy.getPartitionIterator(); while (iterator.hasNext()) { Partition partition = iterator.next(); locationToDateMap.get(partition.getValues().get(1)).add(partition.getValues().get(0)); } Assert.assertEquals("Unexpected date-values.", 0, locationToDateMap.get("isLocatedInTablePath").size()); Assert.assertArrayEquals("Unexpected date-values.", expectedDates.toArray(), locationToDateMap.get("isLocatedOutsideTablePath").toArray()); } catch (Throwable t) { LOG.error("Unexpected Exception!", t); t.printStackTrace(); Assert.assertTrue("Unexpected Exception!", false); } } /** * Test for HiveMetaStoreClient.listPartitionSpecs() and HiveMetaStoreClient.listPartitionSpecsByFilter(). * Check behaviour with and without Partition-grouping enabled. */ @Test public void testGetPartitionSpecs_WithAndWithoutPartitionGrouping() { testGetPartitionSpecs(true); testGetPartitionSpecs(false); } /** * Test to confirm that partitions can be added using PartitionSpecs. */ @Test public void testAddPartitions() { try { // Create source table. HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf); clearAndRecreateDB(hmsc); createTable(hmsc, true); Table table = hmsc.getTable(dbName, tableName); populatePartitions(hmsc, table, Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath")); // Clone the table, String targetTableName = "cloned_" + tableName; Table targetTable = new Table(table); targetTable.setTableName(targetTableName); StorageDescriptor targetTableSd = new StorageDescriptor(targetTable.getSd()); targetTableSd.setLocation( targetTableSd.getLocation().replace( tableName, targetTableName)); hmsc.createTable(targetTable); // Get partition-list from source. PartitionSpecProxy partitionsForAddition = hmsc.listPartitionSpecsByFilter(dbName, tableName, "blurb = \"isLocatedInTablePath\"", -1); partitionsForAddition.setTableName(targetTableName); partitionsForAddition.setRootLocation(targetTableSd.getLocation()); Assert.assertEquals("Unexpected number of partitions added. ", partitionsForAddition.size(), hmsc.add_partitions_pspec(partitionsForAddition)); // Check that the added partitions are as expected. PartitionSpecProxy clonedPartitions = hmsc.listPartitionSpecs(dbName, targetTableName, -1); Assert.assertEquals("Unexpected number of partitions returned. ", partitionsForAddition.size(), clonedPartitions.size()); PartitionSpecProxy.PartitionIterator sourceIterator = partitionsForAddition.getPartitionIterator(), targetIterator = clonedPartitions.getPartitionIterator(); while (targetIterator.hasNext()) { Partition sourcePartition = sourceIterator.next(), targetPartition = targetIterator.next(); Assert.assertEquals("Mismatched values.", sourcePartition.getValues(), targetPartition.getValues()); Assert.assertEquals("Mismatched locations.", sourcePartition.getSd().getLocation(), targetPartition.getSd().getLocation()); } } catch (Throwable t) { LOG.error("Unexpected Exception!", t); t.printStackTrace(); Assert.assertTrue("Unexpected Exception!", false); } } /** * Test to confirm that Partition-grouping behaves correctly when Table-schemas evolve. * Partitions must be grouped by location and schema. */ @Test public void testFetchingPartitionsWithDifferentSchemas() { try { // Create source table. HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf); clearAndRecreateDB(hmsc); createTable(hmsc, true); Table table = hmsc.getTable(dbName, tableName); populatePartitions(hmsc, table, Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath") // Blurb list. ); // Modify table schema. Add columns. List<FieldSchema> fields = table.getSd().getCols(); fields.add(new FieldSchema("goo", "string", "Entirely new column. Doesn't apply to older partitions.")); table.getSd().setCols(fields); hmsc.alter_table(dbName, tableName, table); // Check that the change stuck. table = hmsc.getTable(dbName,tableName); Assert.assertEquals("Unexpected number of table columns.", 3, table.getSd().getColsSize()); // Add partitions with new schema. // Mark Partitions with new schema with different blurb. populatePartitions(hmsc, table, Arrays.asList("hasNewColumn")); // Retrieve *all* partitions from the table. PartitionSpecProxy partitionSpecProxy = hmsc.listPartitionSpecs(dbName, tableName, -1); Assert.assertEquals("Unexpected number of partitions.", nDates * 3, partitionSpecProxy.size()); // Confirm grouping. Assert.assertTrue("Unexpected type of PartitionSpecProxy.", partitionSpecProxy instanceof CompositePartitionSpecProxy); CompositePartitionSpecProxy compositePartitionSpecProxy = (CompositePartitionSpecProxy)partitionSpecProxy; List<PartitionSpec> partitionSpecs = compositePartitionSpecProxy.toPartitionSpec(); Assert.assertTrue("PartitionSpec[0] should have been a SharedSDPartitionSpec.", partitionSpecs.get(0).isSetSharedSDPartitionSpec()); Assert.assertEquals("PartitionSpec[0] should use the table-path as the common root location. ", table.getSd().getLocation(), partitionSpecs.get(0).getRootPath()); Assert.assertTrue("PartitionSpec[1] should have been a SharedSDPartitionSpec.", partitionSpecs.get(1).isSetSharedSDPartitionSpec()); Assert.assertEquals("PartitionSpec[1] should use the table-path as the common root location. ", table.getSd().getLocation(), partitionSpecs.get(1).getRootPath()); Assert.assertTrue("PartitionSpec[2] should have been a ListComposingPartitionSpec.", partitionSpecs.get(2).isSetPartitionList()); // Categorize the partitions returned, and confirm that all partitions are accounted for. PartitionSpecProxy.PartitionIterator iterator = partitionSpecProxy.getPartitionIterator(); Map<String, List<Partition>> blurbToPartitionList = new HashMap<String, List<Partition>>(3); while (iterator.hasNext()) { Partition partition = iterator.next(); String blurb = partition.getValues().get(1); if (!blurbToPartitionList.containsKey(blurb)) { blurbToPartitionList.put(blurb, new ArrayList<Partition>(nDates)); } blurbToPartitionList.get(blurb).add(partition); } // </Classification> // All partitions with blurb="isLocatedOutsideTablePath" should have 2 columns, // and must have locations outside the table directory. for (Partition partition : blurbToPartitionList.get("isLocatedOutsideTablePath")) { Assert.assertEquals("Unexpected number of columns.", 2, partition.getSd().getCols().size()); Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName()); Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName()); String partitionLocation = partition.getSd().getLocation(); String tableLocation = table.getSd().getLocation(); Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been outside table location: " + tableLocation, !partitionLocation.startsWith(tableLocation)); } // All partitions with blurb="isLocatedInTablePath" should have 2 columns, // and must have locations within the table directory. for (Partition partition : blurbToPartitionList.get("isLocatedInTablePath")) { Assert.assertEquals("Unexpected number of columns.", 2, partition.getSd().getCols().size()); Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName()); Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName()); String partitionLocation = partition.getSd().getLocation(); String tableLocation = table.getSd().getLocation(); Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been within table location: " + tableLocation, partitionLocation.startsWith(tableLocation)); } // All partitions with blurb="hasNewColumn" were added after the table schema changed, // and must have 3 columns. Also, the partition locations must lie within the table directory. for (Partition partition : blurbToPartitionList.get("hasNewColumn")) { Assert.assertEquals("Unexpected number of columns.", 3, partition.getSd().getCols().size()); Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName()); Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName()); Assert.assertEquals("Unexpected third column.", "goo", partition.getSd().getCols().get(2).getName()); String partitionLocation = partition.getSd().getLocation(); String tableLocation = table.getSd().getLocation(); Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been within table location: " + tableLocation, partitionLocation.startsWith(tableLocation)); } } catch (Throwable t) { LOG.error("Unexpected Exception!", t); t.printStackTrace(); Assert.assertTrue("Unexpected Exception!", false); } } }