/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.catalog;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.falcon.cluster.util.EmbeddedCluster;
import org.apache.falcon.entity.AbstractTestBase;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.Properties;
import org.apache.falcon.entity.v0.feed.Property;
import org.apache.falcon.util.HiveTestUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
import org.apache.hcatalog.api.HCatPartition;
import org.apache.hcatalog.common.HCatException;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.List;
/**
* Tests plugin that registers partition on succeeded message.
*/
public class CatalogPartitionHandlerTest extends AbstractTestBase {
private static final int METASTORE_PORT = 49083;
private static final String CATALOG_DB = "default";
public static final Path DATA_PATH = new Path("/projects/falcon/clicks/2014/06/18/18");
public static final String CATALOG_TABLE = "clicks";
private Thread hcatServer;
private EmbeddedCluster embeddedCluster;
private String metastoreUrl;
private CatalogPartitionHandler partHandler;
@BeforeClass
public void setup() throws Exception {
embeddedCluster = EmbeddedCluster.newCluster("testCluster");
final String fsUrl = ClusterHelper.getStorageUrl(embeddedCluster.getCluster());
hcatServer = new Thread(new Runnable() {
@Override
public void run() {
try {
HiveConf hiveconf = new HiveConf();
hiveconf.set("hive.metastore.warehouse.dir", new File("target/metastore").getAbsolutePath());
hiveconf.set("fs.default.name", fsUrl);
HiveMetaStore.startMetaStore(METASTORE_PORT, null, hiveconf);
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
});
hcatServer.start();
metastoreUrl = ClusterHelper.getRegistryEndPoint(embeddedCluster.getCluster());
HiveTestUtils.createDatabase(metastoreUrl, "default");
partHandler = CatalogPartitionHandler.get();
}
@BeforeMethod
public void prepare() throws Exception {
cleanupStore();
HiveTestUtils.dropTable(metastoreUrl, CATALOG_DB, CATALOG_TABLE);
FileSystem fs = embeddedCluster.getFileSystem();
fs.delete(DATA_PATH, true);
Cluster cluster = embeddedCluster.getCluster();
store.publish(EntityType.CLUSTER, cluster);
}
@AfterClass
public void cleanup() {
hcatServer.stop();
}
private Feed createFeed(boolean clearPartitions) throws Exception {
Feed feed = (Feed) storeEntity(EntityType.FEED, "feed" + RandomStringUtils.randomAlphanumeric(10));
if (clearPartitions) {
feed.setPartitions(null);
HiveTestUtils.createExternalTable(metastoreUrl, CATALOG_DB, CATALOG_TABLE, Arrays.asList("ds"),
"/projects/falcon/clicks");
} else {
HiveTestUtils.createExternalTable(metastoreUrl, CATALOG_DB, CATALOG_TABLE, Arrays.asList("ds", "country",
"region"), "/projects/falcon/clicks");
}
feed.setProperties(getProperties(CatalogPartitionHandler.CATALOG_TABLE,
"catalog:default:clicks#ds={YEAR}-{MONTH}-{DAY}-{HOUR}"));
return feed;
}
@Test
public void testStaticPartitions() throws Exception {
String clusterName = embeddedCluster.getCluster().getName();
String feedName = createFeed(true).getName();
//no partition if data path doesn't exist
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
try {
HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.fail("Expected exception!");
} catch (HCatException e) {
//expected
}
//success case
FileSystem fs = embeddedCluster.getFileSystem();
fs.mkdirs(DATA_PATH);
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
HCatPartition
partition = HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.assertNotNull(partition);
Assert.assertEquals(new Path(partition.getLocation()).toUri().getPath(), DATA_PATH.toString());
//re-run scenario
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
HCatPartition
newPartition = HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.assertNotNull(newPartition);
//validate that its the same old partition updated
Assert.assertEquals(newPartition.getCreateTime(), partition.getCreateTime());
Assert.assertEquals(new Path(newPartition.getLocation()).toUri().getPath(), DATA_PATH.toString());
}
@Test
public void testEviction() throws Exception {
String clusterName = embeddedCluster.getCluster().getName();
String feedName = createFeed(true).getName();
//add partition
FileSystem fs = embeddedCluster.getFileSystem();
fs.mkdirs(DATA_PATH);
fs.mkdirs(new Path(DATA_PATH, "US"));
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
HCatPartition
partition = HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.assertNotNull(partition);
//drop partition
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), true);
try {
HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.fail("expected HCatException");
} catch (HCatException expected) {
//expected
}
}
@Test
public void testDynamicPartitions() throws Exception {
String clusterName = embeddedCluster.getCluster().getName();
String feedName = createFeed(false).getName();
FileSystem fs = embeddedCluster.getFileSystem();
//no dynamic parts, partition should be registered with *
fs.mkdirs(DATA_PATH);
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
List<HCatPartition> partitions =
HiveTestUtils.getPartitions(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.assertNotNull(partitions);
Assert.assertEquals(partitions.size(), 1);
HCatPartition part = partitions.get(0);
Assert.assertTrue(part.getValues().equals(Arrays.asList("2014-06-18-18", "NODATA", "NODATA")));
//success case
fs.mkdirs(new Path(DATA_PATH, "US/CA"));
fs.mkdirs(new Path(DATA_PATH, "IND/KA"));
//Temporary files should not be considered
fs.mkdirs(new Path(DATA_PATH, "IND/.log"));
fs.create(new Path(DATA_PATH, "IND/_SUCCESS")).close();
fs.create(new Path(DATA_PATH, "IND/part-file")).close();
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
partitions = HiveTestUtils.getPartitions(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.assertNotNull(partitions);
Assert.assertEquals(partitions.size(), 2);
HCatPartition oldPart = partitions.get(0);
Assert.assertTrue(oldPart.getValues().equals(Arrays.asList("2014-06-18-18", "IND", "KA")));
Assert.assertEquals(new Path(oldPart.getLocation()).toUri().getPath(),
new Path(DATA_PATH, "IND/KA").toString());
Assert.assertTrue(partitions.get(1).getValues().equals(Arrays.asList("2014-06-18-18", "US", "CA")));
Assert.assertEquals(new Path(partitions.get(1).getLocation()).toUri().getPath(),
new Path(DATA_PATH, "US/CA").toString());
//re-run scenario
fs.delete(DATA_PATH, true);
fs.mkdirs(new Path(DATA_PATH, "IND/TN"));
fs.mkdirs(new Path(DATA_PATH, "IND/KA"));
partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
partitions = HiveTestUtils.getPartitions(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
Assert.assertNotNull(partitions);
Assert.assertEquals(partitions.size(), 2);
HCatPartition newPart = partitions.get(0);
Assert.assertEquals(newPart.getValues(), Arrays.asList("2014-06-18-18", "IND", "KA"));
Assert.assertEquals(new Path(newPart.getLocation()).toUri().getPath(), new Path(DATA_PATH,
"IND/KA").toString());
Assert.assertEquals(newPart.getParameters().get(CatalogPartitionHandler.CREATE_TIME),
oldPart.getParameters().get(CatalogPartitionHandler.CREATE_TIME)); //same partition
Assert.assertTrue(Long.valueOf(newPart.getParameters().get(CatalogPartitionHandler.UPDATE_TIME))
> Long.valueOf(oldPart.getParameters().get(CatalogPartitionHandler.CREATE_TIME)));
Assert.assertEquals(partitions.get(1).getValues(), Arrays.asList("2014-06-18-18", "IND", "TN"));
Assert.assertEquals(new Path(partitions.get(1).getLocation()).toUri().getPath(),
new Path(DATA_PATH, "IND/TN").toString());
}
private Properties getProperties(String name, String value) {
Properties props = new Properties();
Property prop = new Property();
prop.setName(name);
prop.setValue(value);
props.getProperties().add(prop);
return props;
}
}