/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.metadata; import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.thrift.TException; import org.junit.After; import org.junit.Before; import org.junit.Test; import com.google.common.collect.Lists; /** * TestHiveMetaStoreChecker. * */ public class TestHiveMetaStoreChecker { private Hive hive; private FileSystem fs; private HiveMetaStoreChecker checker = null; private final String dbName = "testhivemetastorechecker_db"; private final String tableName = "testhivemetastorechecker_table"; private final String partDateName = "partdate"; private final String partCityName = "partcity"; private List<FieldSchema> partCols; private List<Map<String, String>> parts; @Before public void setUp() throws Exception { hive = Hive.get(); hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 15); hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "throw"); checker = new HiveMetaStoreChecker(hive); partCols = new ArrayList<FieldSchema>(); partCols.add(new FieldSchema(partDateName, serdeConstants.STRING_TYPE_NAME, "")); partCols.add(new FieldSchema(partCityName, serdeConstants.STRING_TYPE_NAME, "")); parts = new ArrayList<Map<String, String>>(); Map<String, String> part1 = new HashMap<String, String>(); part1.put(partDateName, "2008-01-01"); part1.put(partCityName, "london"); parts.add(part1); Map<String, String> part2 = new HashMap<String, String>(); part2.put(partDateName, "2008-01-02"); part2.put(partCityName, "stockholm"); parts.add(part2); //cleanup just in case something is left over from previous run dropDbTable(); } private void dropDbTable() { // cleanup try { hive.dropTable(dbName, tableName, true, true); hive.dropDatabase(dbName, true, true, true); } catch (NoSuchObjectException e) { // ignore } catch (HiveException e) { // ignore } } @After public void tearDown() throws Exception { dropDbTable(); Hive.closeCurrent(); } @Test public void testTableCheck() throws HiveException, MetaException, IOException, TException, AlreadyExistsException { CheckResult result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); // we haven't added anything so should return an all ok assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); // check table only, should not exist in ms result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(1, result.getTablesNotInMs().size()); assertEquals(tableName, result.getTablesNotInMs().iterator().next()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); Database db = new Database(); db.setName(dbName); hive.createDatabase(db); Table table = new Table(dbName, tableName); table.setDbName(dbName); table.setInputFormatClass(TextInputFormat.class); table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class); hive.createTable(table); // now we've got a table, check that it works // first check all (1) tables result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); // then let's check the one we know about result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); // remove the table folder fs = table.getPath().getFileSystem(hive.getConf()); fs.delete(table.getPath(), true); // now this shouldn't find the path on the fs result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());; assertEquals(1, result.getTablesNotOnFs().size()); assertEquals(tableName, result.getTablesNotOnFs().iterator().next()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); // put it back and one additional table fs.mkdirs(table.getPath()); Path fakeTable = table.getPath().getParent().suffix( Path.SEPARATOR + "faketable"); fs.mkdirs(fakeTable); fs.deleteOnExit(fakeTable); // find the extra table result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); assertEquals(1, result.getTablesNotInMs().size()); assertEquals(fakeTable.getName(), Lists.newArrayList(result.getTablesNotInMs()).get(0)); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); // create a new external table hive.dropTable(dbName, tableName); table.setProperty("EXTERNAL", "TRUE"); hive.createTable(table); // should return all ok result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); } /* * Tests the case when tblPath/p1=a/p2=b/p3=c/file for a table with partition (p1, p2) * does not throw HiveException */ @Test public void testAdditionalPartitionDirs() throws HiveException, AlreadyExistsException, IOException { Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), partDateName + "=2017-01-01/" + partCityName + "=paloalto/fakePartCol=fakepartValue"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); //fakePart path partition is added since the defined partition keys are valid assertEquals(1, result.getPartitionsNotInMs().size()); } @Test(expected = HiveException.class) public void testInvalidPartitionKeyName() throws HiveException, AlreadyExistsException, IOException { Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); checker.checkMetastore(dbName, tableName, null, new CheckResult()); } /* * skip mode should not throw exception when a invalid partition directory * is found. It should just ignore it */ @Test public void testSkipInvalidPartitionKeyName() throws HiveException, AlreadyExistsException, IOException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); checker = new HiveMetaStoreChecker(hive); Table table = createTestTable(); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); // add a fake partition dir on fs fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); Path fakePart = new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); createPartitionsDirectoriesOnFS(table, 2); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); } private Table createTestTable() throws AlreadyExistsException, HiveException { Database db = new Database(); db.setName(dbName); hive.createDatabase(db); Table table = new Table(dbName, tableName); table.setDbName(dbName); table.setInputFormatClass(TextInputFormat.class); table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class); table.setPartCols(partCols); hive.createTable(table); table = hive.getTable(dbName, tableName); for (Map<String, String> partSpec : parts) { hive.createPartition(table, partSpec); } return table; } @Test public void testPartitionsCheck() throws HiveException, MetaException, IOException, TException, AlreadyExistsException { Table table = createTestTable(); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); // all is well assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); List<Partition> partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); Partition partToRemove = partitions.get(0); // As this partition (partdate=2008-01-01/partcity=london) is the only // partition under (partdate=2008-01-01) // we also need to delete partdate=2008-01-01 to make it consistent. Path partToRemovePath = partToRemove.getDataLocation().getParent(); fs = partToRemovePath.getFileSystem(hive.getConf()); fs.delete(partToRemovePath, true); result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); // missing one partition on fs assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(1, result.getPartitionsNotOnFs().size()); assertEquals(partToRemove.getName(), result.getPartitionsNotOnFs().iterator().next() .getPartitionName()); assertEquals(partToRemove.getTable().getTableName(), result.getPartitionsNotOnFs().iterator().next().getTableName()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); List<Map<String, String>> partsCopy = new ArrayList<Map<String, String>>(); partsCopy.add(partitions.get(1).getSpec()); // check only the partition that exists, all should be well result = new CheckResult(); checker.checkMetastore(dbName, tableName, partsCopy, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); // old test is moved to msck_repair_2.q // cleanup hive.dropTable(dbName, tableName, true, true); hive.createTable(table); result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); //--0e System.err.println("Test completed - partition check"); } @Test public void testDataDeletion() throws HiveException, MetaException, IOException, TException, AlreadyExistsException, NoSuchObjectException { Database db = new Database(); db.setName(dbName); hive.createDatabase(db); Table table = new Table(dbName, tableName); table.setDbName(dbName); table.setInputFormatClass(TextInputFormat.class); table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class); table.setPartCols(partCols); hive.createTable(table); table = hive.getTable(dbName, tableName); Path fakeTable = table.getPath().getParent().suffix( Path.SEPARATOR + "faketable"); fs = fakeTable.getFileSystem(hive.getConf()); fs.mkdirs(fakeTable); fs.deleteOnExit(fakeTable); Path fakePart = new Path(table.getDataLocation().toString(), "fakepartition=fakevalue"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); hive.dropTable(dbName, tableName, true, true); assertFalse(fs.exists(fakePart)); hive.dropDatabase(dbName); assertFalse(fs.exists(fakeTable)); } /* * Test multi-threaded implementation of checker to find out missing partitions */ @Test public void testPartitionsNotInMs() throws HiveException, AlreadyExistsException, IOException { Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } /* * Tests single threaded implementation of checkMetastore */ @Test public void testSingleThreadedCheckMetastore() throws HiveException, AlreadyExistsException, IOException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } /** * Tests single threaded implementation for deeply nested partitioned tables * * @throws HiveException * @throws AlreadyExistsException * @throws IOException */ @Test public void testSingleThreadedDeeplyNestedTables() throws HiveException, AlreadyExistsException, IOException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); int poolSize = 2; // create a deeply nested table which has more partition keys than the pool size Table testTable = createPartitionedTestTable(dbName, tableName, poolSize + 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } /** * Tests the case when the number of partition keys are more than the threadpool size. * * @throws HiveException * @throws AlreadyExistsException * @throws IOException */ @Test public void testDeeplyNestedPartitionedTables() throws HiveException, AlreadyExistsException, IOException { hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 2); int poolSize = 2; // create a deeply nested table which has more partition keys than the pool size Table testTable = createPartitionedTestTable(dbName, tableName, poolSize + 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } /** * Test if checker throws HiveException when the there is a dummy directory present in the nested level * of sub-directories * @throws AlreadyExistsException * @throws IOException * @throws HiveException */ @Test public void testErrorForMissingPartitionColumn() throws AlreadyExistsException, IOException, HiveException { Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); //create a fake directory to throw exception StringBuilder sb = new StringBuilder(testTable.getDataLocation().toString()); sb.append(Path.SEPARATOR); sb.append("dummyPart=error"); createDirectory(sb.toString()); //check result now CheckResult result = new CheckResult(); Exception exception = null; try { checker.checkMetastore(dbName, tableName, null, result); } catch (Exception e) { exception = e; } assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); createFile(sb.toString(), "dummyFile"); result = new CheckResult(); exception = null; try { checker.checkMetastore(dbName, tableName, null, result); } catch (Exception e) { exception = e; } assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); } /** * Tests if there exists a unknown partition directory on the FS with in-valid order of partition * keys than what is specified in table specification. * * @throws AlreadyExistsException * @throws HiveException * @throws IOException */ @Test(expected = HiveException.class) public void testInvalidOrderForPartitionKeysOnFS() throws AlreadyExistsException, HiveException, IOException { Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createInvalidPartitionDirsOnFS(testTable, 10); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); } /* * In skip mode msck should ignore invalid partitions instead of * throwing exception */ @Test public void testSkipInvalidOrderForPartitionKeysOnFS() throws AlreadyExistsException, HiveException, IOException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); checker = new HiveMetaStoreChecker(hive); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createInvalidPartitionDirsOnFS(testTable, 2); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 2); CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(Collections.<String> emptySet(), result.getTablesNotInMs()); assertEquals(Collections.<String> emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.<String> emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); } /* * Test if single-threaded implementation checker throws HiveException when the there is a dummy * directory present in the nested level */ @Test public void testErrorForMissingPartitionsSingleThreaded() throws AlreadyExistsException, HiveException, IOException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); // create a fake directory to throw exception StringBuilder sb = new StringBuilder(testTable.getDataLocation().toString()); sb.append(Path.SEPARATOR); sb.append("dummyPart=error"); createDirectory(sb.toString()); // check result now CheckResult result = new CheckResult(); Exception exception = null; try { checker.checkMetastore(dbName, tableName, null, result); } catch (Exception e) { exception = e; } assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); createFile(sb.toString(), "dummyFile"); result = new CheckResult(); exception = null; try { checker.checkMetastore(dbName, tableName, null, result); } catch (Exception e) { exception = e; } assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); } /** * Creates a test partitioned table with the required level of nested partitions and number of * partitions * * @param dbName - Database name * @param tableName - Table name * @param numOfPartKeys - Number of partition keys (nested levels of sub-directories in base table * path) * @param valuesPerPartition - If greater than 0 creates valuesPerPartition dummy partitions * @return * @throws AlreadyExistsException * @throws HiveException */ private Table createPartitionedTestTable(String dbName, String tableName, int numOfPartKeys, int valuesPerPartition) throws AlreadyExistsException, HiveException { Database db = new Database(); db.setName(dbName); hive.createDatabase(db); Table table = new Table(dbName, tableName); table.setDbName(dbName); table.setInputFormatClass(TextInputFormat.class); table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class); // create partition key schema ArrayList<FieldSchema> partKeys = new ArrayList<FieldSchema>(); for (int i = 1; i <= numOfPartKeys; i++) { String partName = "part" + String.valueOf(i); partKeys.add(new FieldSchema(partName, serdeConstants.STRING_TYPE_NAME, "")); } table.setPartCols(partKeys); // create table hive.createTable(table); table = hive.getTable(dbName, tableName); if (valuesPerPartition == 0) { return table; } // create partition specs ArrayList<Map<String, String>> partitionSpecs = new ArrayList<Map<String, String>>(); for (int partKeyIndex = 0; partKeyIndex < numOfPartKeys; partKeyIndex++) { String partName = partKeys.get(partKeyIndex).getName(); Map<String, String> partMap = new HashMap<>(); for (int val = 1; val <= valuesPerPartition; val++) { partMap.put(partName, String.valueOf(val)); } partitionSpecs.add(partMap); } // create partitions for (Map<String, String> partSpec : partitionSpecs) { hive.createPartition(table, partSpec); } List<Partition> partitions = hive.getPartitions(table); assertEquals(numOfPartKeys * valuesPerPartition, partitions.size()); return table; } /** * Creates partition sub-directories for a given table on the file system. Used to test the * use-cases when partitions for the table are not present in the metastore db * * @param table - Table which provides the base locations and partition specs for creating the * sub-directories * @param numPartitions - Number of partitions to be created * @param reverseOrder - If set to true creates the partition sub-directories in the reverse order * of specified by partition keys defined for the table * @throws IOException */ private void createPartitionsDirectoriesOnFS(Table table, int numPartitions, boolean reverseOrder) throws IOException { String path = table.getDataLocation().toString(); fs = table.getPath().getFileSystem(hive.getConf()); int numPartKeys = table.getPartitionKeys().size(); for (int i = 0; i < numPartitions; i++) { StringBuilder partPath = new StringBuilder(path); partPath.append(Path.SEPARATOR); if (!reverseOrder) { for (int j = 0; j < numPartKeys; j++) { FieldSchema field = table.getPartitionKeys().get(j); partPath.append(field.getName()); partPath.append('='); partPath.append("val_"); partPath.append(i); if (j < (numPartKeys - 1)) { partPath.append(Path.SEPARATOR); } } } else { for (int j = numPartKeys - 1; j >= 0; j--) { FieldSchema field = table.getPartitionKeys().get(j); partPath.append(field.getName()); partPath.append('='); partPath.append("val_"); partPath.append(i); if (j > 0) { partPath.append(Path.SEPARATOR); } } } createDirectory(partPath.toString()); } } private void createPartitionsDirectoriesOnFS(Table table, int numPartitions) throws IOException { createPartitionsDirectoriesOnFS(table, numPartitions, false); } /** * Creates a partition directory structure on file system but with a reverse order * of sub-directories compared to the partition keys defined in the table. Eg. if the * partition keys defined in table are (a int, b int, c int) this method will create * an invalid directory c=val_1/b=val_1/a=val_1 * @param table * @throws IOException */ private void createInvalidPartitionDirsOnFS(Table table, int numPartitions) throws IOException { createPartitionsDirectoriesOnFS(table, numPartitions, true); } private void createFile(String partPath, String filename) throws IOException { Path part = new Path(partPath); fs.mkdirs(part); fs.createNewFile(new Path(partPath + Path.SEPARATOR + filename)); fs.deleteOnExit(part); } private void createDirectory(String partPath) throws IOException { Path part = new Path(partPath); fs.mkdirs(part); fs.deleteOnExit(part); } }