/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.api; import java.io.IOException; import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import com.google.common.base.Function; import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NotificationEvent; import org.apache.hadoop.hive.metastore.api.PartitionEventType; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hive.hcatalog.api.repl.Command; import org.apache.hive.hcatalog.api.repl.ReplicationTask; import org.apache.hive.hcatalog.api.repl.ReplicationUtils; import org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider; import org.apache.hive.hcatalog.api.repl.exim.EximReplicationTaskFactory; import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; import org.apache.hive.hcatalog.NoExitSecurityManager; import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; import org.apache.hive.hcatalog.listener.DbNotificationListener; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertArrayEquals; import org.apache.hadoop.util.Shell; import javax.annotation.Nullable; public class TestHCatClient { private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); private static int msPort; private static HiveConf hcatConf; private static boolean isReplicationTargetHCatRunning = false; private static int replicationTargetHCatPort; private static HiveConf replicationTargetHCatConf; private static SecurityManager securityManager; private static boolean useExternalMS = false; @AfterClass public static void tearDown() throws Exception { if (!useExternalMS) { LOG.info("Shutting down metastore."); System.setSecurityManager(securityManager); } } @BeforeClass public static void startMetaStoreServer() throws Exception { hcatConf = new HiveConf(TestHCatClient.class); String metastoreUri = System.getProperty("test."+HiveConf.ConfVars.METASTOREURIS.varname); if (metastoreUri != null) { hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); useExternalMS = true; return; } System.setProperty(HiveConf.ConfVars.METASTORE_EVENT_LISTENERS.varname, DbNotificationListener.class.getName()); // turn on db notification listener on metastore msPort = MetaStoreUtils.startMetaStore(); securityManager = System.getSecurityManager(); System.setSecurityManager(new NoExitSecurityManager()); hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); } public static HiveConf getConf(){ return hcatConf; } public static String fixPath(String path) { String expectedDir = path.replaceAll("\\\\", "/"); if (!expectedDir.startsWith("/")) { expectedDir = "/" + expectedDir; } return expectedDir; } public static String makePartLocation(HCatTable table, Map<String, String> partitionSpec) throws MetaException { return (new Path(table.getSd().getLocation(), Warehouse.makePartPath(partitionSpec))).toUri().toString(); } @Test public void testBasicDDLCommands() throws Exception { String db = "testdb"; String tableOne = "testTable1"; String tableTwo = "testTable2"; String tableThree = "testTable3"; HCatClient client = HCatClient.create(new Configuration(hcatConf)); client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) .build(); client.createDatabase(dbDesc); List<String> dbNames = client.listDatabaseNamesByPattern("*"); assertTrue(dbNames.contains("default")); assertTrue(dbNames.contains(db)); HCatDatabase testDb = client.getDatabase(db); assertTrue(testDb.getComment() == null); assertTrue(testDb.getProperties().size() == 0); String warehouseDir = System .getProperty("test.warehouse.dir", "/user/hive/warehouse"); if (useExternalMS) { assertTrue(testDb.getLocation().matches(".*" + "/" + db + ".db")); } else { String expectedDir = warehouseDir.replaceFirst("pfile:///", "pfile:/"); assertEquals(expectedDir + "/" + db + ".db", testDb.getLocation()); } ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); HCatCreateTableDesc tableDesc = HCatCreateTableDesc .create(db, tableOne, cols).fileFormat("rcfile").build(); client.createTable(tableDesc); HCatTable table1 = client.getTable(db, tableOne); assertTrue(table1.getInputFileFormat().equalsIgnoreCase( RCFileInputFormat.class.getName())); assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( RCFileOutputFormat.class.getName())); assertTrue(table1.getSerdeLib().equalsIgnoreCase( LazyBinaryColumnarSerDe.class.getName())); assertTrue(table1.getCols().equals(cols)); // Since "ifexists" was not set to true, trying to create the same table // again // will result in an exception. try { client.createTable(tableDesc); fail("Expected exception"); } catch (HCatException e) { assertTrue(e.getMessage().contains( "AlreadyExistsException while creating table.")); } client.dropTable(db, tableOne, true); HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, tableTwo, cols).fieldsTerminatedBy('\001').escapeChar('\002').linesTerminatedBy('\003'). mapKeysTerminatedBy('\004').collectionItemsTerminatedBy('\005').nullDefinedAs('\006').build(); client.createTable(tableDesc2); HCatTable table2 = client.getTable(db, tableTwo); assertTrue("Expected TextInputFormat, but got: " + table2.getInputFileFormat(), table2.getInputFileFormat().equalsIgnoreCase(TextInputFormat.class.getName())); assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( HiveIgnoreKeyTextOutputFormat.class.getName())); assertTrue("SerdeParams not found", table2.getSerdeParams() != null); assertEquals("checking " + serdeConstants.FIELD_DELIM, Character.toString('\001'), table2.getSerdeParams().get(serdeConstants.FIELD_DELIM)); assertEquals("checking " + serdeConstants.ESCAPE_CHAR, Character.toString('\002'), table2.getSerdeParams().get(serdeConstants.ESCAPE_CHAR)); assertEquals("checking " + serdeConstants.LINE_DELIM, Character.toString('\003'), table2.getSerdeParams().get(serdeConstants.LINE_DELIM)); assertEquals("checking " + serdeConstants.MAPKEY_DELIM, Character.toString('\004'), table2.getSerdeParams().get(serdeConstants.MAPKEY_DELIM)); assertEquals("checking " + serdeConstants.COLLECTION_DELIM, Character.toString('\005'), table2.getSerdeParams().get(serdeConstants.COLLECTION_DELIM)); assertEquals("checking " + serdeConstants.SERIALIZATION_NULL_FORMAT, Character.toString('\006'), table2.getSerdeParams().get(serdeConstants.SERIALIZATION_NULL_FORMAT)); assertTrue(table2.getLocation().toLowerCase().matches(".*" + ("/" + db + ".db/" + tableTwo).toLowerCase())); HCatCreateTableDesc tableDesc3 = HCatCreateTableDesc.create(db, tableThree, cols).fileFormat("orcfile").build(); client.createTable(tableDesc3); HCatTable table3 = client.getTable(db, tableThree); assertTrue(table3.getInputFileFormat().equalsIgnoreCase( OrcInputFormat.class.getName())); assertTrue(table3.getOutputFileFormat().equalsIgnoreCase( OrcOutputFormat.class.getName())); assertTrue(table3.getSerdeLib().equalsIgnoreCase( OrcSerde.class.getName())); assertTrue(table1.getCols().equals(cols)); client.close(); } /** * This test tests that a plain table instantiation matches what hive says an * empty table create should look like. * @throws Exception */ @Test public void testEmptyTableInstantiation() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String dbName = "default"; String tblName = "testEmptyCreate"; ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); client.dropTable(dbName, tblName, true); // Create a minimalistic table client.createTable(HCatCreateTableDesc .create(new HCatTable(dbName, tblName).cols(cols), false) .build()); HCatTable tCreated = client.getTable(dbName, tblName); org.apache.hadoop.hive.metastore.api.Table emptyTable = Table.getEmptyTable(dbName, tblName); Map<String, String> createdProps = tCreated.getTblProps(); Map<String, String> emptyProps = emptyTable.getParameters(); mapEqualsContainedIn(emptyProps, createdProps); // Test sd params - we check that all the parameters in an empty table // are retained as-is. We may add beyond it, but not change values for // any parameters that hive defines for an empty table. Map<String, String> createdSdParams = tCreated.getSerdeParams(); Map<String, String> emptySdParams = emptyTable.getSd().getSerdeInfo().getParameters(); mapEqualsContainedIn(emptySdParams, createdSdParams); } /** * Verifies that an inner map is present inside an outer map, with * all values being equal. */ private void mapEqualsContainedIn(Map<String, String> inner, Map<String, String> outer) { assertNotNull(inner); assertNotNull(outer); for ( Map.Entry<String,String> e : inner.entrySet()){ assertTrue(outer.containsKey(e.getKey())); assertEquals(outer.get(e.getKey()), e.getValue()); } } @Test public void testPartitionsHCatClientImpl() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String dbName = "ptnDB"; String tableName = "pageView"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) .ifNotExists(true).build(); client.createDatabase(dbDesc); ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, "view time columns")); cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); cols.add(new HCatFieldSchema("ip", Type.STRING, "IP Address of the User")); ArrayList<HCatFieldSchema> ptnCols = new ArrayList<HCatFieldSchema>(); ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); ptnCols.add(new HCatFieldSchema("country", Type.STRING, "country column")); HCatTable table = new HCatTable(dbName, tableName).cols(cols) .partCols(ptnCols) .fileFormat("sequenceFile"); HCatCreateTableDesc tableDesc = HCatCreateTableDesc.create(table, false).build(); client.createTable(tableDesc); // Verify that the table is created successfully. table = client.getTable(dbName, tableName); Map<String, String> firstPtn = new HashMap<String, String>(); firstPtn.put("dt", "04/30/2012"); firstPtn.put("country", "usa"); // Test new HCatAddPartitionsDesc API. HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(new HCatPartition(table, firstPtn, null)).build(); client.addPartition(addPtn); Map<String, String> secondPtn = new HashMap<String, String>(); secondPtn.put("dt", "04/12/2012"); secondPtn.put("country", "brazil"); // Test deprecated HCatAddPartitionsDesc API. HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, tableName, null, secondPtn).build(); client.addPartition(addPtn2); Map<String, String> thirdPtn = new HashMap<String, String>(); thirdPtn.put("dt", "04/13/2012"); thirdPtn.put("country", "argentina"); // Test deprecated HCatAddPartitionsDesc API. HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, tableName, null, thirdPtn).build(); client.addPartition(addPtn3); List<HCatPartition> ptnList = client.listPartitionsByFilter(dbName, tableName, null); assertTrue(ptnList.size() == 3); HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); assertTrue(ptn != null); client.dropPartitions(dbName, tableName, firstPtn, true); ptnList = client.listPartitionsByFilter(dbName, tableName, null); assertTrue(ptnList.size() == 2); List<HCatPartition> ptnListTwo = client.listPartitionsByFilter(dbName, tableName, "country = \"argentina\""); assertTrue(ptnListTwo.size() == 1); client.markPartitionForEvent(dbName, tableName, thirdPtn, PartitionEventType.LOAD_DONE); boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, thirdPtn, PartitionEventType.LOAD_DONE); assertTrue(isMarked); client.close(); } @Test public void testDatabaseLocation() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String dbName = "locationDB"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) .ifNotExists(true).location("/tmp/" + dbName).build(); client.createDatabase(dbDesc); HCatDatabase newDB = client.getDatabase(dbName); assertTrue(newDB.getLocation().matches(".*/tmp/" + dbName)); client.close(); } @Test public void testCreateTableLike() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String tableName = "tableone"; String cloneTable = "tabletwo"; client.dropTable(null, tableName, true); client.dropTable(null, cloneTable, true); ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); HCatCreateTableDesc tableDesc = HCatCreateTableDesc .create(null, tableName, cols).fileFormat("rcfile").build(); client.createTable(tableDesc); // create a new table similar to previous one. client.createTableLike(null, tableName, cloneTable, true, false, null); List<String> tables = client.listTableNamesByPattern(null, "table*"); assertTrue(tables.size() == 2); client.close(); } @Test public void testRenameTable() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String tableName = "temptable"; String newName = "mytable"; client.dropTable(null, tableName, true); client.dropTable(null, newName, true); ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); HCatCreateTableDesc tableDesc = HCatCreateTableDesc .create(null, tableName, cols).fileFormat("rcfile").build(); client.createTable(tableDesc); client.renameTable(null, tableName, newName); try { client.getTable(null, tableName); } catch (HCatException exp) { assertTrue("Unexpected exception message: " + exp.getMessage(), exp.getMessage().contains("NoSuchObjectException while fetching table")); } HCatTable newTable = client.getTable(null, newName); assertTrue(newTable != null); assertTrue(newTable.getTableName().equals(newName)); client.close(); } @Test public void testTransportFailure() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); boolean isExceptionCaught = false; // Table creation with a long table name causes ConnectionFailureException final String tableName = "Temptable" + new BigInteger(260, new Random()).toString(2); ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); try { HCatCreateTableDesc tableDesc = HCatCreateTableDesc .create(null, tableName, cols).fileFormat("rcfile").build(); client.createTable(tableDesc); } catch (Exception exp) { isExceptionCaught = true; assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); // The connection was closed, so create a new one. client = HCatClient.create(new Configuration(hcatConf)); String newName = "goodTable"; client.dropTable(null, newName, true); HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc .create(null, newName, cols).fileFormat("rcfile").build(); client.createTable(tableDesc2); HCatTable newTable = client.getTable(null, newName); assertTrue(newTable != null); assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); } finally { client.close(); assertTrue("The expected exception was never thrown.", isExceptionCaught); } } @Test public void testOtherFailure() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String tableName = "Temptable"; boolean isExceptionCaught = false; client.dropTable(null, tableName, true); ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); try { HCatCreateTableDesc tableDesc = HCatCreateTableDesc .create(null, tableName, cols).fileFormat("rcfile").build(); client.createTable(tableDesc); // The DB foo is non-existent. client.getTable("foo", tableName); } catch (Exception exp) { isExceptionCaught = true; assertTrue(exp instanceof HCatException); String newName = "goodTable"; client.dropTable(null, newName, true); HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc .create(null, newName, cols).fileFormat("rcfile").build(); client.createTable(tableDesc2); HCatTable newTable = client.getTable(null, newName); assertTrue(newTable != null); assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); } finally { client.close(); assertTrue("The expected exception was never thrown.", isExceptionCaught); } } @Test public void testDropTableException() throws Exception { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String tableName = "tableToBeDropped"; boolean isExceptionCaught = false; client.dropTable(null, tableName, true); try { client.dropTable(null, tableName, false); } catch (Exception exp) { isExceptionCaught = true; assertTrue(exp instanceof HCatException); LOG.info("Drop Table Exception: " + exp.getCause()); } finally { client.close(); assertTrue("The expected exception was never thrown.", isExceptionCaught); } } @Test public void testUpdateTableSchema() throws Exception { try { HCatClient client = HCatClient.create(new Configuration(hcatConf)); final String dbName = "testUpdateTableSchema_DBName"; final String tableName = "testUpdateTableSchema_TableName"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); client.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")); client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); List<HCatFieldSchema> newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), new HCatFieldSchema("new", Type.STRING, ""), new HCatFieldSchema("fields", Type.STRING, "")); client.updateTableSchema(dbName, tableName, newSchema); assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } catch (Exception exception) { LOG.error("Unexpected exception.", exception); assertTrue("Unexpected exception: " + exception.getMessage(), false); } } @Test public void testObjectNotFoundException() throws Exception { try { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String dbName = "testObjectNotFoundException_DBName"; String tableName = "testObjectNotFoundException_TableName"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); try { // Test that fetching a non-existent db-name yields ObjectNotFound. client.getDatabase(dbName); assertTrue("Expected ObjectNotFoundException.", false); } catch(Exception exception) { LOG.info("Got exception: ", exception); assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), exception instanceof ObjectNotFoundException); } client.createDatabase(HCatCreateDBDesc.create(dbName).build()); try { // Test that fetching a non-existent table-name yields ObjectNotFound. client.getTable(dbName, tableName); assertTrue("Expected ObjectNotFoundException.", false); } catch(Exception exception) { LOG.info("Got exception: ", exception); assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), exception instanceof ObjectNotFoundException); } String partitionColumn = "part"; List<HCatFieldSchema> columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); ArrayList<HCatFieldSchema> partitionColumns = new ArrayList<HCatFieldSchema>( Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); HCatTable table = new HCatTable(dbName, tableName).cols(columns).partCols(partitionColumns); client.createTable(HCatCreateTableDesc.create(table, false).build()); HCatTable createdTable = client.getTable(dbName,tableName); Map<String, String> partitionSpec = new HashMap<String, String>(); partitionSpec.put(partitionColumn, "foobar"); try { // Test that fetching a non-existent partition yields ObjectNotFound. client.getPartition(dbName, tableName, partitionSpec); assertTrue("Expected ObjectNotFoundException.", false); } catch(Exception exception) { LOG.info("Got exception: ", exception); assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), exception instanceof ObjectNotFoundException); } client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(createdTable, partitionSpec, makePartLocation(createdTable,partitionSpec))).build()); // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. assertEquals("Expected empty set of partitions.", 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. partitionSpec.put("NonExistentKey", "foobar"); client.getPartition(dbName, tableName, partitionSpec); assertTrue("Expected HCatException.", false); } catch(Exception exception) { LOG.info("Got exception: ", exception); assertTrue("Expected HCatException. Got:" + exception.getClass(), exception instanceof HCatException); assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); } } catch (Throwable t) { LOG.error("Unexpected exception!", t); assertTrue("Unexpected exception! " + t.getMessage(), false); } } @Test public void testGetMessageBusTopicName() throws Exception { try { HCatClient client = HCatClient.create(new Configuration(hcatConf)); String dbName = "testGetMessageBusTopicName_DBName"; String tableName = "testGetMessageBusTopicName_TableName"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); client.createDatabase(HCatCreateDBDesc.create(dbName).build()); String messageBusTopicName = "MY.topic.name"; Map<String, String> tableProperties = new HashMap<String, String>(1); tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); client.close(); } catch (Exception exception) { LOG.error("Unexpected exception.", exception); assertTrue("Unexpected exception:" + exception.getMessage(), false); } } @Test public void testPartitionSchema() throws Exception { try { HCatClient client = HCatClient.create(new Configuration(hcatConf)); final String dbName = "myDb"; final String tableName = "myTable"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); client.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")); List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, "")); client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); HCatTable table = client.getTable(dbName, tableName); List<HCatFieldSchema> partitionColumns = table.getPartCols(); assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", partitionSchema.toArray(), partitionColumns.toArray()); client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } catch (Exception unexpected) { LOG.error("Unexpected exception!", unexpected); assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } } @Test public void testGetPartitionsWithPartialSpec() throws Exception { try { HCatClient client = HCatClient.create(new Configuration(hcatConf)); final String dbName = "myDb"; final String tableName = "myTable"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); client.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")); List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, "")); HCatTable table = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema); client.createTable(HCatCreateTableDesc.create(table, false).build()); // Verify that the table was created successfully. table = client.getTable(dbName, tableName); assertNotNull("The created just now can't be null.", table); Map<String, String> partitionSpec = new HashMap<String, String>(); partitionSpec.put("grid", "AB"); partitionSpec.put("dt", "2011_12_31"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table,partitionSpec))).build()); partitionSpec.put("grid", "AB"); partitionSpec.put("dt", "2012_01_01"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table,partitionSpec))).build()); partitionSpec.put("dt", "2012_01_01"); partitionSpec.put("grid", "OB"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table,partitionSpec))).build()); partitionSpec.put("dt", "2012_01_01"); partitionSpec.put("grid", "XB"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table,partitionSpec))).build()); Map<String, String> partialPartitionSpec = new HashMap<String, String>(); partialPartitionSpec.put("dt", "2012_01_01"); List<HCatPartition> partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); assertEquals("Unexpected number of partitions.", 3, partitions.size()); assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } catch (Exception unexpected) { LOG.error("Unexpected exception!", unexpected); assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } } @Test public void testDropPartitionsWithPartialSpec() throws Exception { try { HCatClient client = HCatClient.create(new Configuration(hcatConf)); final String dbName = "myDb"; final String tableName = "myTable"; client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); client.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")); List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, "")); HCatTable table = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema); client.createTable(HCatCreateTableDesc.create(table, false).build()); // Verify that the table was created successfully. table = client.getTable(dbName, tableName); assertNotNull("Table couldn't be queried for. ", table); Map<String, String> partitionSpec = new HashMap<String, String>(); partitionSpec.put("grid", "AB"); partitionSpec.put("dt", "2011_12_31"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build()); partitionSpec.put("grid", "AB"); partitionSpec.put("dt", "2012_01_01"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build()); partitionSpec.put("dt", "2012_01_01"); partitionSpec.put("grid", "OB"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build()); partitionSpec.put("dt", "2012_01_01"); partitionSpec.put("grid", "XB"); client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build()); Map<String, String> partialPartitionSpec = new HashMap<String, String>(); partialPartitionSpec.put("dt", "2012_01_01"); client.dropPartitions(dbName, tableName, partialPartitionSpec, true); List<HCatPartition> partitions = client.getPartitions(dbName, tableName); assertEquals("Unexpected number of partitions.", 1, partitions.size()); assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); List<HCatFieldSchema> partColumns = partitions.get(0).getPartColumns(); assertEquals(2, partColumns.size()); assertEquals("dt", partColumns.get(0).getName()); assertEquals("grid", partColumns.get(1).getName()); client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } catch (Exception unexpected) { LOG.error("Unexpected exception!", unexpected); assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } } private void startReplicationTargetMetaStoreIfRequired() throws Exception { if (!isReplicationTargetHCatRunning) { HiveConf conf = new HiveConf(); conf.set("javax.jdo.option.ConnectionURL", hcatConf.get("javax.jdo.option.ConnectionURL") .replace("metastore", "target_metastore")); replicationTargetHCatPort = MetaStoreUtils.startMetaStore(conf); replicationTargetHCatConf = new HiveConf(hcatConf); replicationTargetHCatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + replicationTargetHCatPort); isReplicationTargetHCatRunning = true; } } /** * Test for event-based replication scenario * * Does not test if replication actually happened, merely tests if we're able to consume a repl task * iter appropriately, calling all the functions expected of the interface, without errors. */ @Test public void testReplicationTaskIter() throws Exception { Configuration cfg = new Configuration(hcatConf); cfg.set(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX.varname,"10"); // set really low batch size to ensure batching cfg.set(HiveConf.ConfVars.HIVE_REPL_TASK_FACTORY.varname, EximReplicationTaskFactory.class.getName()); HCatClient sourceMetastore = HCatClient.create(cfg); String dbName = "testReplicationTaskIter"; long baseId = sourceMetastore.getCurrentNotificationEventId(); { // Perform some operations // 1: Create a db after dropping if needed => 1 or 2 events sourceMetastore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); sourceMetastore.createDatabase(HCatCreateDBDesc.create(dbName).ifNotExists(false).build()); // 2: Create an unpartitioned table T1 => 1 event String tblName1 = "T1"; List<HCatFieldSchema> cols1 = HCatSchemaUtils.getHCatSchema("a:int,b:string").getFields(); HCatTable table1 = (new HCatTable(dbName, tblName1)).cols(cols1); sourceMetastore.createTable(HCatCreateTableDesc.create(table1).build()); // 3: Create a partitioned table T2 => 1 event String tblName2 = "T2"; List<HCatFieldSchema> cols2 = HCatSchemaUtils.getHCatSchema("a:int").getFields(); List<HCatFieldSchema> pcols2 = HCatSchemaUtils.getHCatSchema("b:string").getFields(); HCatTable table2 = (new HCatTable(dbName, tblName2)).cols(cols2).partCols(pcols2); sourceMetastore.createTable(HCatCreateTableDesc.create(table2).build()); // 4: Add a partition P1 to T2 => 1 event HCatTable table2Created = sourceMetastore.getTable(dbName,tblName2); Map<String, String> ptnDesc1 = new HashMap<String,String>(); ptnDesc1.put("b","test1"); HCatPartition ptn1 = (new HCatPartition(table2Created, ptnDesc1, makePartLocation(table2Created,ptnDesc1))); sourceMetastore.addPartition(HCatAddPartitionDesc.create(ptn1).build()); // 5 : Create and drop partition P2 to T2 10 times => 20 events for (int i = 0; i < 20; i++){ Map<String, String> ptnDesc = new HashMap<String,String>(); ptnDesc.put("b","testmul"+i); HCatPartition ptn = (new HCatPartition(table2Created, ptnDesc, makePartLocation(table2Created,ptnDesc))); sourceMetastore.addPartition(HCatAddPartitionDesc.create(ptn).build()); sourceMetastore.dropPartitions(dbName,tblName2,ptnDesc,true); } // 6 : Drop table T1 => 1 event sourceMetastore.dropTable(dbName, tblName1, true); // 7 : Drop table T2 => 1 event sourceMetastore.dropTable(dbName, tblName2, true); // verify that the number of events since we began is at least 25 more long currId = sourceMetastore.getCurrentNotificationEventId(); assertTrue("currId[" + currId + "] must be more than 25 greater than baseId[" + baseId + "]", currId > baseId + 25); } // do rest of tests on db we just picked up above. List<HCatNotificationEvent> notifs = sourceMetastore.getNextNotification( 0, 0, new IMetaStoreClient.NotificationFilter() { @Override public boolean accept(NotificationEvent event) { return true; } }); for(HCatNotificationEvent n : notifs){ LOG.info("notif from dblistener:" + n.getEventId() + ":" + n.getEventTime() + ",t:" + n.getEventType() + ",o:" + n.getDbName() + "." + n.getTableName()); } Iterator<ReplicationTask> taskIter = sourceMetastore.getReplicationTasks(0, -1, dbName, null); while(taskIter.hasNext()){ ReplicationTask task = taskIter.next(); HCatNotificationEvent n = task.getEvent(); LOG.info("notif from tasks:" + n.getEventId() + ":" + n.getEventTime() + ",t:" + n.getEventType() + ",o:" + n.getDbName() + "." + n.getTableName() + ",s:" + n.getEventScope()); LOG.info("task :" + task.getClass().getName()); if (task.needsStagingDirs()){ StagingDirectoryProvider provider = new StagingDirectoryProvider() { @Override public String getStagingDirectory(String key) { LOG.info("getStagingDirectory(" + key + ") called!"); return "/tmp/" + key.replaceAll(" ","_"); } }; task .withSrcStagingDirProvider(provider) .withDstStagingDirProvider(provider); } if (task.isActionable()){ LOG.info("task was actionable!"); Function<Command, String> commandDebugPrinter = new Function<Command, String>() { @Override public String apply(@Nullable Command cmd) { StringBuilder sb = new StringBuilder(); String serializedCmd = null; try { serializedCmd = ReplicationUtils.serializeCommand(cmd); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } sb.append("SERIALIZED:"+serializedCmd+"\n"); Command command = null; try { command = ReplicationUtils.deserializeCommand(serializedCmd); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } sb.append("CMD:[" + command.getClass().getName() + "]\n"); sb.append("EVENTID:[" +command.getEventId()+"]\n"); for (String s : command.get()) { sb.append("CMD:" + s); sb.append("\n"); } sb.append("Retriable:" + command.isRetriable() + "\n"); sb.append("Undoable:" + command.isUndoable() + "\n"); if (command.isUndoable()) { for (String s : command.getUndo()) { sb.append("UNDO:" + s); sb.append("\n"); } } List<String> locns = command.cleanupLocationsPerRetry(); sb.append("cleanupLocationsPerRetry entries :" + locns.size()); for (String s : locns){ sb.append("RETRY_CLEANUP:"+s); sb.append("\n"); } locns = command.cleanupLocationsAfterEvent(); sb.append("cleanupLocationsAfterEvent entries :" + locns.size()); for (String s : locns){ sb.append("AFTER_EVENT_CLEANUP:"+s); sb.append("\n"); } return sb.toString(); } }; LOG.info("On src:"); for (String s : Iterables.transform(task.getSrcWhCommands(), commandDebugPrinter)){ LOG.info(s); } LOG.info("On dest:"); for (String s : Iterables.transform(task.getDstWhCommands(), commandDebugPrinter)){ LOG.info(s); } } else { LOG.info("task was not actionable."); } } } /** * Test for detecting schema-changes for an HCatalog table, across 2 different HCat instances. * A table is created with the same schema on 2 HCat instances. The table-schema is modified on the source HCat * instance (columns, I/O formats, SerDe definitions, etc.). The table metadata is compared between source * and target, the changes are detected and propagated to target. * @throws Exception */ @Test public void testTableSchemaPropagation() throws Exception { try { startReplicationTargetMetaStoreIfRequired(); HCatClient sourceMetaStore = HCatClient.create(new Configuration(hcatConf)); final String dbName = "myDb"; final String tableName = "myTable"; sourceMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); sourceMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")); List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, "")); HCatTable sourceTable = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema); sourceMetaStore.createTable(HCatCreateTableDesc.create(sourceTable).build()); // Verify that the sourceTable was created successfully. sourceTable = sourceMetaStore.getTable(dbName, tableName); assertNotNull("Table couldn't be queried for. ", sourceTable); // Serialize Table definition. Deserialize using the target HCatClient instance. String tableStringRep = sourceMetaStore.serializeTable(sourceTable); HCatClient targetMetaStore = HCatClient.create(new Configuration(replicationTargetHCatConf)); targetMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); targetMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build()); HCatTable targetTable = targetMetaStore.deserializeTable(tableStringRep); assertEquals("Table after deserialization should have been identical to sourceTable.", HCatTable.NO_DIFF, sourceTable.diff(targetTable)); // Create table on Target. targetMetaStore.createTable(HCatCreateTableDesc.create(targetTable).build()); // Verify that the created table is identical to sourceTable. targetTable = targetMetaStore.getTable(dbName, tableName); assertEquals("Table after deserialization should have been identical to sourceTable.", HCatTable.NO_DIFF, sourceTable.diff(targetTable)); // Modify sourceTable. List<HCatFieldSchema> newColumnSchema = new ArrayList<HCatFieldSchema>(columnSchema); newColumnSchema.add(new HCatFieldSchema("goo_new", Type.DOUBLE, "")); Map<String, String> tableParams = new HashMap<String, String>(1); tableParams.put("orc.compress", "ZLIB"); sourceTable.cols(newColumnSchema) // Add a column. .fileFormat("orcfile") // Change SerDe, File I/O formats. .tblProps(tableParams) .serdeParam(serdeConstants.FIELD_DELIM, Character.toString('\001')); sourceMetaStore.updateTableSchema(dbName, tableName, sourceTable); sourceTable = sourceMetaStore.getTable(dbName, tableName); // Diff against table on target. EnumSet<HCatTable.TableAttribute> diff = targetTable.diff(sourceTable); assertTrue("Couldn't find change in column-schema.", diff.contains(HCatTable.TableAttribute.COLUMNS)); assertTrue("Couldn't find change in InputFormat.", diff.contains(HCatTable.TableAttribute.INPUT_FORMAT)); assertTrue("Couldn't find change in OutputFormat.", diff.contains(HCatTable.TableAttribute.OUTPUT_FORMAT)); assertTrue("Couldn't find change in SerDe.", diff.contains(HCatTable.TableAttribute.SERDE)); assertTrue("Couldn't find change in SerDe parameters.", diff.contains(HCatTable.TableAttribute.SERDE_PROPERTIES)); assertTrue("Couldn't find change in Table parameters.", diff.contains(HCatTable.TableAttribute.TABLE_PROPERTIES)); // Replicate the changes to the replicated-table. targetMetaStore.updateTableSchema(dbName, tableName, targetTable.resolve(sourceTable, diff)); targetTable = targetMetaStore.getTable(dbName, tableName); assertEquals("After propagating schema changes, source and target tables should have been equivalent.", HCatTable.NO_DIFF, targetTable.diff(sourceTable)); } catch (Exception unexpected) { LOG.error("Unexpected exception!", unexpected); assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } } /** * Test that partition-definitions can be replicated between HCat-instances, * independently of table-metadata replication. * 2 identical tables are created on 2 different HCat instances ("source" and "target"). * On the source instance, * 1. One partition is added with the old format ("TEXTFILE"). * 2. The table is updated with an additional column and the data-format changed to ORC. * 3. Another partition is added with the new format. * 4. The partitions' metadata is copied to the target HCat instance, without updating the target table definition. * 5. The partitions' metadata is tested to be an exact replica of that on the source. * @throws Exception */ @Test public void testPartitionRegistrationWithCustomSchema() throws Exception { try { startReplicationTargetMetaStoreIfRequired(); HCatClient sourceMetaStore = HCatClient.create(new Configuration(hcatConf)); final String dbName = "myDb"; final String tableName = "myTable"; sourceMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); sourceMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> columnSchema = new ArrayList<HCatFieldSchema>( Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, ""))); List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, "")); HCatTable sourceTable = new HCatTable(dbName, tableName).cols(columnSchema) .partCols(partitionSchema) .comment("Source table."); sourceMetaStore.createTable(HCatCreateTableDesc.create(sourceTable).build()); // Verify that the sourceTable was created successfully. sourceTable = sourceMetaStore.getTable(dbName, tableName); assertNotNull("Table couldn't be queried for. ", sourceTable); // Partitions added now should inherit table-schema, properties, etc. Map<String, String> partitionSpec_1 = new HashMap<String, String>(); partitionSpec_1.put("grid", "AB"); partitionSpec_1.put("dt", "2011_12_31"); HCatPartition sourcePartition_1 = new HCatPartition(sourceTable, partitionSpec_1, makePartLocation(sourceTable,partitionSpec_1)); sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_1).build()); assertEquals("Unexpected number of partitions. ", 1, sourceMetaStore.getPartitions(dbName, tableName).size()); // Verify that partition_1 was added correctly, and properties were inherited from the HCatTable. HCatPartition addedPartition_1 = sourceMetaStore.getPartition(dbName, tableName, partitionSpec_1); assertEquals("Column schema doesn't match.", sourceTable.getCols(), addedPartition_1.getColumns()); assertEquals("InputFormat doesn't match.", sourceTable.getInputFileFormat(), addedPartition_1.getInputFormat()); assertEquals("OutputFormat doesn't match.", sourceTable.getOutputFileFormat(), addedPartition_1.getOutputFormat()); assertEquals("SerDe doesn't match.", sourceTable.getSerdeLib(), addedPartition_1.getSerDe()); assertEquals("SerDe params don't match.", sourceTable.getSerdeParams(), addedPartition_1.getSerdeParams()); // Replicate table definition. HCatClient targetMetaStore = HCatClient.create(new Configuration(replicationTargetHCatConf)); targetMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); targetMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build()); // Make a copy of the source-table, as would be done across class-loaders. HCatTable targetTable = targetMetaStore.deserializeTable(sourceMetaStore.serializeTable(sourceTable)); targetMetaStore.createTable(HCatCreateTableDesc.create(targetTable).build()); targetTable = targetMetaStore.getTable(dbName, tableName); assertEquals("Created table doesn't match the source.", HCatTable.NO_DIFF, targetTable.diff(sourceTable)); // Modify Table schema at the source. List<HCatFieldSchema> newColumnSchema = new ArrayList<HCatFieldSchema>(columnSchema); newColumnSchema.add(new HCatFieldSchema("goo_new", Type.DOUBLE, "")); Map<String, String> tableParams = new HashMap<String, String>(1); tableParams.put("orc.compress", "ZLIB"); sourceTable.cols(newColumnSchema) // Add a column. .fileFormat("orcfile") // Change SerDe, File I/O formats. .tblProps(tableParams) .serdeParam(serdeConstants.FIELD_DELIM, Character.toString('\001')); sourceMetaStore.updateTableSchema(dbName, tableName, sourceTable); sourceTable = sourceMetaStore.getTable(dbName, tableName); // Add another partition to the source. Map<String, String> partitionSpec_2 = new HashMap<String, String>(); partitionSpec_2.put("grid", "AB"); partitionSpec_2.put("dt", "2012_01_01"); HCatPartition sourcePartition_2 = new HCatPartition(sourceTable, partitionSpec_2, makePartLocation(sourceTable,partitionSpec_2)); sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_2).build()); // The source table now has 2 partitions, one in TEXTFILE, the other in ORC. // Test adding these partitions to the target-table *without* replicating the table-change. List<HCatPartition> sourcePartitions = sourceMetaStore.getPartitions(dbName, tableName); assertEquals("Unexpected number of source partitions.", 2, sourcePartitions.size()); List<HCatAddPartitionDesc> addPartitionDescs = new ArrayList<HCatAddPartitionDesc>(sourcePartitions.size()); for (HCatPartition partition : sourcePartitions) { addPartitionDescs.add(HCatAddPartitionDesc.create(partition).build()); } targetMetaStore.addPartitions(addPartitionDescs); List<HCatPartition> targetPartitions = targetMetaStore.getPartitions(dbName, tableName); assertEquals("Expected the same number of partitions. ", sourcePartitions.size(), targetPartitions.size()); for (int i=0; i<targetPartitions.size(); ++i) { HCatPartition sourcePartition = sourcePartitions.get(i), targetPartition = targetPartitions.get(i); assertEquals("Column schema doesn't match.", sourcePartition.getColumns(), targetPartition.getColumns()); assertEquals("InputFormat doesn't match.", sourcePartition.getInputFormat(), targetPartition.getInputFormat()); assertEquals("OutputFormat doesn't match.", sourcePartition.getOutputFormat(), targetPartition.getOutputFormat()); assertEquals("SerDe doesn't match.", sourcePartition.getSerDe(), targetPartition.getSerDe()); assertEquals("SerDe params don't match.", sourcePartition.getSerdeParams(), targetPartition.getSerdeParams()); } } catch (Exception unexpected) { LOG.error( "Unexpected exception! ", unexpected); assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } } /** * Test that partition-definitions can be replicated between HCat-instances, * independently of table-metadata replication, using PartitionSpec interfaces. * (This is essentially the same test as testPartitionRegistrationWithCustomSchema(), * transliterated to use the PartitionSpec APIs.) * 2 identical tables are created on 2 different HCat instances ("source" and "target"). * On the source instance, * 1. One partition is added with the old format ("TEXTFILE"). * 2. The table is updated with an additional column and the data-format changed to ORC. * 3. Another partition is added with the new format. * 4. The partitions' metadata is copied to the target HCat instance, without updating the target table definition. * 5. The partitions' metadata is tested to be an exact replica of that on the source. * @throws Exception */ @Test public void testPartitionSpecRegistrationWithCustomSchema() throws Exception { try { startReplicationTargetMetaStoreIfRequired(); HCatClient sourceMetaStore = HCatClient.create(new Configuration(hcatConf)); final String dbName = "myDb"; final String tableName = "myTable"; sourceMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); sourceMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build()); List<HCatFieldSchema> columnSchema = new ArrayList<HCatFieldSchema>( Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, ""))); List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, "")); HCatTable sourceTable = new HCatTable(dbName, tableName).cols(columnSchema) .partCols(partitionSchema) .comment("Source table."); sourceMetaStore.createTable(HCatCreateTableDesc.create(sourceTable).build()); // Verify that the sourceTable was created successfully. sourceTable = sourceMetaStore.getTable(dbName, tableName); assertNotNull("Table couldn't be queried for. ", sourceTable); // Partitions added now should inherit table-schema, properties, etc. Map<String, String> partitionSpec_1 = new HashMap<String, String>(); partitionSpec_1.put("grid", "AB"); partitionSpec_1.put("dt", "2011_12_31"); HCatPartition sourcePartition_1 = new HCatPartition(sourceTable, partitionSpec_1, makePartLocation(sourceTable,partitionSpec_1)); sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_1).build()); assertEquals("Unexpected number of partitions. ", 1, sourceMetaStore.getPartitions(dbName, tableName).size()); // Verify that partition_1 was added correctly, and properties were inherited from the HCatTable. HCatPartition addedPartition_1 = sourceMetaStore.getPartition(dbName, tableName, partitionSpec_1); assertEquals("Column schema doesn't match.", sourceTable.getCols(), addedPartition_1.getColumns()); assertEquals("InputFormat doesn't match.", sourceTable.getInputFileFormat(), addedPartition_1.getInputFormat()); assertEquals("OutputFormat doesn't match.", sourceTable.getOutputFileFormat(), addedPartition_1.getOutputFormat()); assertEquals("SerDe doesn't match.", sourceTable.getSerdeLib(), addedPartition_1.getSerDe()); assertEquals("SerDe params don't match.", sourceTable.getSerdeParams(), addedPartition_1.getSerdeParams()); // Replicate table definition. HCatClient targetMetaStore = HCatClient.create(new Configuration(replicationTargetHCatConf)); targetMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); targetMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build()); // Make a copy of the source-table, as would be done across class-loaders. HCatTable targetTable = targetMetaStore.deserializeTable(sourceMetaStore.serializeTable(sourceTable)); targetMetaStore.createTable(HCatCreateTableDesc.create(targetTable).build()); targetTable = targetMetaStore.getTable(dbName, tableName); assertEquals("Created table doesn't match the source.", HCatTable.NO_DIFF, targetTable.diff(sourceTable)); // Modify Table schema at the source. List<HCatFieldSchema> newColumnSchema = new ArrayList<HCatFieldSchema>(columnSchema); newColumnSchema.add(new HCatFieldSchema("goo_new", Type.DOUBLE, "")); Map<String, String> tableParams = new HashMap<String, String>(1); tableParams.put("orc.compress", "ZLIB"); sourceTable.cols(newColumnSchema) // Add a column. .fileFormat("orcfile") // Change SerDe, File I/O formats. .tblProps(tableParams) .serdeParam(serdeConstants.FIELD_DELIM, Character.toString('\001')); sourceMetaStore.updateTableSchema(dbName, tableName, sourceTable); sourceTable = sourceMetaStore.getTable(dbName, tableName); // Add another partition to the source. Map<String, String> partitionSpec_2 = new HashMap<String, String>(); partitionSpec_2.put("grid", "AB"); partitionSpec_2.put("dt", "2012_01_01"); HCatPartition sourcePartition_2 = new HCatPartition(sourceTable, partitionSpec_2, makePartLocation(sourceTable,partitionSpec_2)); sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_2).build()); // The source table now has 2 partitions, one in TEXTFILE, the other in ORC. // Test adding these partitions to the target-table *without* replicating the table-change. HCatPartitionSpec sourcePartitionSpec = sourceMetaStore.getPartitionSpecs(dbName, tableName, -1); assertEquals("Unexpected number of source partitions.", 2, sourcePartitionSpec.size()); // Serialize the hcatPartitionSpec. List<String> partitionSpecString = sourceMetaStore.serializePartitionSpec(sourcePartitionSpec); // Deserialize the HCatPartitionSpec using the target HCatClient instance. HCatPartitionSpec targetPartitionSpec = targetMetaStore.deserializePartitionSpec(partitionSpecString); assertEquals("Could not add the expected number of partitions.", sourcePartitionSpec.size(), targetMetaStore.addPartitionSpec(targetPartitionSpec)); // Retrieve partitions. targetPartitionSpec = targetMetaStore.getPartitionSpecs(dbName, tableName, -1); assertEquals("Could not retrieve the expected number of partitions.", sourcePartitionSpec.size(), targetPartitionSpec.size()); // Assert that the source and target partitions are equivalent. HCatPartitionSpec.HCatPartitionIterator sourceIterator = sourcePartitionSpec.getPartitionIterator(); HCatPartitionSpec.HCatPartitionIterator targetIterator = targetPartitionSpec.getPartitionIterator(); while (targetIterator.hasNext()) { assertTrue("Fewer target partitions than source.", sourceIterator.hasNext()); HCatPartition sourcePartition = sourceIterator.next(); HCatPartition targetPartition = targetIterator.next(); assertEquals("Column schema doesn't match.", sourcePartition.getColumns(), targetPartition.getColumns()); assertEquals("InputFormat doesn't match.", sourcePartition.getInputFormat(), targetPartition.getInputFormat()); assertEquals("OutputFormat doesn't match.", sourcePartition.getOutputFormat(), targetPartition.getOutputFormat()); assertEquals("SerDe doesn't match.", sourcePartition.getSerDe(), targetPartition.getSerDe()); assertEquals("SerDe params don't match.", sourcePartition.getSerdeParams(), targetPartition.getSerdeParams()); } } catch (Exception unexpected) { LOG.error( "Unexpected exception! ", unexpected); assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } } }