/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.regression.prism;
import org.apache.falcon.regression.Entities.FeedMerlin;
import org.apache.falcon.regression.core.bundle.Bundle;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.feed.ActionType;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.regression.core.response.ServiceResponse;
import org.apache.falcon.regression.core.util.AssertUtil;
import org.apache.falcon.regression.core.util.BundleUtil;
import org.apache.falcon.regression.core.util.HadoopUtil;
import org.apache.falcon.regression.core.util.InstanceUtil;
import org.apache.falcon.regression.core.util.OSUtil;
import org.apache.falcon.regression.core.util.OozieUtil;
import org.apache.falcon.regression.core.util.TimeUtil;
import org.apache.falcon.regression.core.util.Util;
import org.apache.falcon.regression.testHelper.BaseTestClass;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.apache.oozie.client.CoordinatorAction;
import org.apache.oozie.client.OozieClient;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Replication feed with partitions as expression language variables tests.
*/
@Test(groups = "distributed")
public class PrismFeedReplicationPartitionExpTest extends BaseTestClass {
private FileSystem cluster1FS = serverFS.get(0);
private FileSystem cluster2FS = serverFS.get(1);
private FileSystem cluster3FS = serverFS.get(2);
private OozieClient cluster1OC = serverOC.get(0);
private OozieClient cluster2OC = serverOC.get(1);
private OozieClient cluster3OC = serverOC.get(2);
private String testDate = "/2012/10/01/12/";
private String baseTestDir = cleanAndGetTestDir();
private String testBaseDir1 = baseTestDir + "/localDC/rc/billing";
private String testBaseDir2 = baseTestDir + "/clusterPath/localDC/rc/billing";
private String testBaseDir3 = baseTestDir + "/dataBillingRC/fetlrc/billing";
private String testBaseDir4 = baseTestDir + "/sourcetarget";
private String testBaseDirServer1Source = baseTestDir + "/source1";
private String testDirWithDate = testBaseDir1 + testDate;
private String testDirWithDateSourceTarget = testBaseDir4 + testDate;
private String testDirWithDateSource1 = testBaseDirServer1Source + testDate;
private String testFile1 = OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.xml");
private String testFile2 = OSUtil.concat(OSUtil.RESOURCES, "pig", "id.pig");
private String testFile3 = OSUtil.concat(OSUtil.RESOURCES, "ELbundle", "cluster-0.1.xml");
private String testFile4 = OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.properties");
private static final Logger LOGGER =
Logger.getLogger(PrismFeedReplicationPartitionExpTest.class);
// pt : partition in target
// ps: partition in source
private void uploadDataToServer3(String location, String fileName) throws IOException {
HadoopUtil.recreateDir(cluster3FS, location);
HadoopUtil.copyDataToFolder(cluster3FS, location, fileName);
}
private void uploadDataToServer1(String location, String fileName) throws IOException {
HadoopUtil.recreateDir(cluster1FS, location);
HadoopUtil.copyDataToFolder(cluster1FS, location, fileName);
}
@BeforeClass(alwaysRun = true)
public void createTestData() throws Exception {
LOGGER.info("creating test data");
uploadDataToServer3(testDirWithDate + "00/ua2/", testFile1);
uploadDataToServer3(testDirWithDate + "05/ua2/", testFile2);
uploadDataToServer3(testDirWithDate + "10/ua2/", testFile3);
uploadDataToServer3(testDirWithDate + "15/ua2/", testFile4);
uploadDataToServer3(testDirWithDate + "20/ua2/", testFile4);
uploadDataToServer3(testDirWithDate + "00/ua1/", testFile1);
uploadDataToServer3(testDirWithDate + "05/ua1/", testFile2);
uploadDataToServer3(testDirWithDate + "10/ua1/", testFile3);
uploadDataToServer3(testDirWithDate + "15/ua1/", testFile4);
uploadDataToServer3(testDirWithDate + "20/ua1/", testFile4);
uploadDataToServer3(testDirWithDate + "00/ua3/", testFile1);
uploadDataToServer3(testDirWithDate + "05/ua3/", testFile2);
uploadDataToServer3(testDirWithDate + "10/ua3/", testFile3);
uploadDataToServer3(testDirWithDate + "15/ua3/", testFile4);
uploadDataToServer3(testDirWithDate + "20/ua3/", testFile4);
uploadDataToServer3(testBaseDir3 + testDate + "00/ua2/", testFile1);
uploadDataToServer3(testBaseDir3 + testDate + "05/ua2/", testFile2);
uploadDataToServer3(testBaseDir3 + testDate + "10/ua2/", testFile3);
uploadDataToServer3(testBaseDir3 + testDate + "15/ua2/", testFile4);
uploadDataToServer3(testBaseDir3 + testDate + "20/ua2/", testFile4);
uploadDataToServer3(testBaseDir3 + testDate + "00/ua1/", testFile1);
uploadDataToServer3(testBaseDir3 + testDate + "05/ua1/", testFile2);
uploadDataToServer3(testBaseDir3 + testDate + "10/ua1/", testFile3);
uploadDataToServer3(testBaseDir3 + testDate + "15/ua1/", testFile4);
uploadDataToServer3(testBaseDir3 + testDate + "20/ua1/", testFile4);
uploadDataToServer3(testBaseDir3 + testDate + "00/ua3/", testFile1);
uploadDataToServer3(testBaseDir3 + testDate + "05/ua3/", testFile2);
uploadDataToServer3(testBaseDir3 + testDate + "10/ua3/", testFile3);
uploadDataToServer3(testBaseDir3 + testDate + "15/ua3/", testFile4);
uploadDataToServer3(testBaseDir3 + testDate + "20/ua3/", testFile4);
//data for test normalTest_1s2t_pst where both source target partition are required
uploadDataToServer3(testDirWithDateSourceTarget + "00/ua3/ua2/", testFile1);
uploadDataToServer3(testDirWithDateSourceTarget + "05/ua3/ua2/", testFile2);
uploadDataToServer3(testDirWithDateSourceTarget + "10/ua3/ua2/", testFile3);
uploadDataToServer3(testDirWithDateSourceTarget + "15/ua3/ua2/", testFile4);
uploadDataToServer3(testDirWithDateSourceTarget + "20/ua3/ua2/", testFile4);
uploadDataToServer3(testDirWithDateSourceTarget + "00/ua3/ua1/", testFile1);
uploadDataToServer3(testDirWithDateSourceTarget + "05/ua3/ua1/", testFile2);
uploadDataToServer3(testDirWithDateSourceTarget + "10/ua3/ua1/", testFile3);
uploadDataToServer3(testDirWithDateSourceTarget + "15/ua3/ua1/", testFile4);
uploadDataToServer3(testDirWithDateSourceTarget + "20/ua3/ua1/", testFile4);
// data when server 1 acts as source
uploadDataToServer1(testDirWithDateSource1 + "00/ua2/", testFile1);
uploadDataToServer1(testDirWithDateSource1 + "05/ua2/", testFile2);
uploadDataToServer1(testDirWithDateSource1 + "00/ua1/", testFile1);
uploadDataToServer1(testDirWithDateSource1 + "05/ua1/", testFile2);
uploadDataToServer1(testDirWithDateSource1 + "00/ua3/", testFile1);
uploadDataToServer1(testDirWithDateSource1 + "05/ua3/", testFile2);
LOGGER.info("completed creating test data");
}
@BeforeMethod(alwaysRun = true)
public void setup() throws Exception {
Bundle bundle = BundleUtil.readFeedReplicationBundle();
for (int i = 0; i < 3; i++) {
bundles[i] = new Bundle(bundle, servers.get(i));
bundles[i].generateUniqueBundle(this);
}
}
@AfterMethod(alwaysRun = true)
public void tearDown() throws Exception {
for (String dir : new String[]{testBaseDir1, testBaseDir2, testBaseDir3, testBaseDir4}) {
HadoopUtil.deleteDirIfExists(dir, cluster1FS);
HadoopUtil.deleteDirIfExists(dir, cluster2FS);
}
removeTestClassEntities();
}
@Test(enabled = true, groups = "embedded")
public void blankPartition() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
//partition is left blank
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:05Z";
String startTimeUA2 = "2012-10-01T12:10Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2012-10-01T12:10Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("")
.withDataLocation(testBaseDir1 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2012-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withPartition("")
.withDataLocation(testBaseDir2 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("")
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertFailed(r, "submit of feed should have failed as the partition in source is blank");
}
@Test(enabled = true)
public void normalTest1Source1Target1NeutralPartitionedSource() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
// there are 1 source clusters cluster3
//cluster2 is the target
//data should be replicated to cluster2 from cluster3
// path for data in target cluster should also be customized
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:00Z";
String startTimeUA2 = "2012-10-01T12:00Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(100000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2099-10-01T12:10Z")
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(100000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2099-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withDataLocation(testBaseDir2 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(100000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir1 + MINUTE_DATE_PATTERN)
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertSucceeded(r);
r = prism.getFeedHelper().schedule(feed.toString());
AssertUtil.assertSucceeded(r);
TimeUtil.sleepSeconds(15);
HadoopUtil.recreateDir(cluster3FS, testDirWithDate + "00/ua3/");
HadoopUtil.recreateDir(cluster3FS, testDirWithDate + "05/ua3/");
HadoopUtil.copyDataToFolder(cluster3FS, testDirWithDate + "00/ua3/", testFile1);
HadoopUtil.copyDataToFolder(cluster3FS, testDirWithDate + "05/ua3/", testFile2);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 2,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 1);
//check if data has been replicated correctly
//on ua1 only ua1 should be replicated, ua2 only ua2
//number of files should be same as source
List<Path> ua2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir2));
AssertUtil.failIfStringFoundInPath(ua2ReplicatedData, "ua1", "ua2");
List<Path> ua3ReplicatedData00 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "00/ua3/"));
List<Path> ua3ReplicatedData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "05/ua3/"));
List<Path> ua2ReplicatedData00 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir2 + testDate + "00"));
List<Path> ua2ReplicatedData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir2 + testDate + "05"));
AssertUtil.checkForListSizes(ua3ReplicatedData00, ua2ReplicatedData00);
AssertUtil.checkForListSizes(ua3ReplicatedData05, ua2ReplicatedData05);
}
@Test(enabled = true)
public void normalTest1Source1Target1NeutralPartitionedTarget() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
// path for data in target cluster should also be customized
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:00Z";
String startTimeUA2 = "2012-10-01T12:00Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2099-10-01T12:10Z")
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2099-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir2 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withDataLocation(testBaseDir1 + MINUTE_DATE_PATTERN)
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitAndSchedule(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertSucceeded(r);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 2,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "RETENTION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster1OC, feed.getName(), "RETENTION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "RETENTION"), 1);
//check if data has been replicated correctly
//on ua1 only ua1 should be replicated, ua2 only ua2
//number of files should be same as source
List<Path> ua2ReplicatedData =
HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir2));
AssertUtil.failIfStringFoundInPath(ua2ReplicatedData, "ua1", "ua3");
List<Path> ua3ReplicatedData00 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "00/ua2/"));
List<Path> ua3ReplicatedData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "05/ua2/"));
List<Path> ua2ReplicatedData00 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir2 + testDate + "00"));
List<Path> ua2ReplicatedData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir2 + testDate + "05"));
AssertUtil.checkForListSizes(ua3ReplicatedData00, ua2ReplicatedData00);
AssertUtil.checkForListSizes(ua3ReplicatedData05, ua2ReplicatedData05);
}
@Test(enabled = true)
public void normalTest1Source2TargetPartitionedTarget() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
//cluster3 is global cluster where test data is present in location
// /data/fetlrc/billing/2012/10/01/12/
// (00 to 30)
//data should be replicated to folder on cluster1 and cluster2 as targets
//ua3 is the source and ua1 and ua2 are target
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:05Z";
String startTimeUA2 = "2012-10-01T12:10Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(testBaseDir3 + MINUTE_DATE_PATTERN);
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2012-10-01T12:10Z")
.withClusterType(ClusterType.TARGET)
.withPartition("${cluster.colo}")
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2012-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withPartition("${cluster.colo}")
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertSucceeded(r);
AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString()));
TimeUtil.sleepSeconds(15);
InstanceUtil.waitTillInstanceReachState(cluster1OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 3,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
//check if data has been replicated correctly
//on ua1 only ua1 should be replicated, ua2 only ua2
//number of files should be same as source
List<Path> ua1ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, new Path(testBaseDir3 + testDate));
//check for no ua2 or ua3 in ua1
AssertUtil.failIfStringFoundInPath(ua1ReplicatedData, "ua2", "ua3");
List<Path> ua2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir3 + testDate));
AssertUtil.failIfStringFoundInPath(ua2ReplicatedData, "ua1", "ua3");
List<Path> ua1ReplicatedData00 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, new Path(testBaseDir3 + testDate + "00/"));
List<Path> ua1ReplicatedData10 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, new Path(testBaseDir3 + testDate + "10/"));
List<Path> ua2ReplicatedData10 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir3 + testDate + "10"));
List<Path> ua2ReplicatedData15 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir3 + testDate + "15"));
List<Path> ua3OriginalData10ua1 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir3 + testDate + "10/ua1"));
List<Path> ua3OriginalData10ua2 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir3 + testDate + "10/ua2"));
List<Path> ua3OriginalData15ua2 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir3 + testDate + "15/ua2"));
AssertUtil.checkForListSizes(ua1ReplicatedData00, new ArrayList<Path>());
AssertUtil.checkForListSizes(ua1ReplicatedData10, ua3OriginalData10ua1);
AssertUtil.checkForListSizes(ua2ReplicatedData10, ua3OriginalData10ua2);
AssertUtil.checkForListSizes(ua2ReplicatedData15, ua3OriginalData15ua2);
}
@Test(enabled = true, groups = "embedded")
public void normalTest2Source1TargetPartitionedTarget() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
// there are 2 source clusters cluster3 and cluster1
//cluster2 is the target
// Since there is no partition expression in source clusters, the feed submission should
// fail (FALCON-305).
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:05Z";
String startTimeUA2 = "2012-10-01T12:10Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2012-10-01T12:10Z")
.withClusterType(ClusterType.SOURCE)
.withDataLocation(testBaseDir1 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2012-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir2 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.build());
//clean target if old data exists
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
AssertUtil.assertFailed(r, "Submission of feed should have failed.");
Assert.assertTrue(r.getMessage().contains(
"Partition expression has to be specified for cluster "
+ Util.readEntityName(bundles[0].getClusters().get(0))
+ " as there are more than one source clusters"),
"Failed response has unexpected error message.");
}
@Test(enabled = true)
public void normalTest1Source2TargetPartitionedSource() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
//cluster3 is global cluster where test data is present in location
// /data/fetlrc/billing/2012/10/01/12/
// (00 to 30)
//data should be replicated to folder on cluster1 and cluster2 as targets
//ua3 is the source and ua1 and ua2 are target
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:05Z";
String startTimeUA2 = "2012-10-01T12:10Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(testBaseDir1 + MINUTE_DATE_PATTERN);
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(10000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2012-10-01T12:11Z")
.withClusterType(ClusterType.TARGET)
.withDataLocation(testBaseDir1 + "/ua1" + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(10000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2012-10-01T12:26Z")
.withClusterType(ClusterType.TARGET)
.withDataLocation(testBaseDir1 + "/ua2" + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(10000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("${cluster.colo}")
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertSucceeded(r);
AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString()));
TimeUtil.sleepSeconds(15);
InstanceUtil.waitTillInstanceReachState(cluster1OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 2,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
//check if data has been replicated correctly
//on ua1 only ua1 should be replicated, ua2 only ua2
//number of files should be same as source
List<Path> ua1ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, new Path(testBaseDir1 + "/ua1" + testDate));
//check for no ua2 or ua3 in ua1
AssertUtil.failIfStringFoundInPath(ua1ReplicatedData, "ua2");
List<Path> ua2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir1 + "/ua2" + testDate));
AssertUtil.failIfStringFoundInPath(ua2ReplicatedData, "ua1");
List<Path> ua1ReplicatedData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS,
new Path(testBaseDir1 + "/ua1" + testDate + "05/"));
List<Path> ua1ReplicatedData10 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS,
new Path(testBaseDir1 + "/ua1" + testDate + "10/"));
List<Path> ua2ReplicatedData10 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir1 + "/ua2" + testDate + "10"));
List<Path> ua2ReplicatedData15 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir1 + "/ua2" + testDate + "15"));
List<Path> ua3OriginalData10ua1 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "10/ua1"));
List<Path> ua3OriginalData05ua1 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "05/ua1"));
List<Path> ua3OriginalData10ua2 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "10/ua2"));
List<Path> ua3OriginalData15ua2 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "15/ua2"));
AssertUtil.checkForListSizes(ua1ReplicatedData10, ua3OriginalData10ua1);
AssertUtil.checkForListSizes(ua1ReplicatedData05, ua3OriginalData05ua1);
AssertUtil.checkForListSizes(ua2ReplicatedData10, ua3OriginalData10ua2);
AssertUtil.checkForListSizes(ua2ReplicatedData15, ua3OriginalData15ua2);
}
@Test(enabled = true)
public void normalTest2Source1TargetPartitionedSource() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
// there are 2 source clusters cluster3 and cluster1
//cluster2 is the target
//data should be replicated to cluster2 from ua2 sub dir of cluster3 and cluster1
// source cluster path in cluster1 should be mentioned in cluster definition
// path for data in target cluster should also be customized
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:00Z";
String startTimeUA2 = "2012-10-01T12:00Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2099-10-01T12:10Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDirServer1Source + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2099-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withDataLocation(testBaseDir2 + "/replicated" + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir1 + MINUTE_DATE_PATTERN)
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertSucceeded(r);
r = prism.getFeedHelper().schedule(feed.toString());
AssertUtil.assertSucceeded(r);
TimeUtil.sleepSeconds(15);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 2,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
//check if data has been replicated correctly
//on ua1 only ua1 should be replicated, ua2 only ua2
//number of files should be same as source
List<Path> ua2ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS,
new Path(testBaseDir2 + "/replicated" + testDate));
AssertUtil.failIfStringFoundInPath(ua2ReplicatedData, "ua2");
List<Path> ua2ReplicatedData00ua1 = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS,
new Path(testBaseDir2 + "/replicated" + testDate + "00/ua1"));
List<Path> ua2ReplicatedData05ua3 = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS,
new Path(testBaseDir2 + "/replicated" + testDate + "05/ua3/"));
List<Path> ua1OriginalData00 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, new Path(
testBaseDirServer1Source + testDate + "00/ua1"));
List<Path> ua3OriginalData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(testDirWithDate + "05/ua1"));
AssertUtil.checkForListSizes(ua2ReplicatedData00ua1, ua1OriginalData00);
AssertUtil.checkForListSizes(ua2ReplicatedData05ua3, ua3OriginalData05);
}
@Test(enabled = true)
public void normalTest1Source2TargetPartitionedSourceTarget() throws Exception {
//this test is for ideal condition when data is present in all the required places and
// replication takes
// place normally
//cluster3 is global cluster where test data is present in location
// /data/fetlrc/billing/2012/10/01/12/
// (00 to 30)
//data should be replicated to folder on cluster1 and cluster2 as targets
//ua3 is the source and ua1 and ua2 are target
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:05Z";
String startTimeUA2 = "2012-10-01T12:10Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(testBaseDir1 + MINUTE_DATE_PATTERN);
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2099-10-01T12:10Z")
.withClusterType(ClusterType.TARGET)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir1 + "/ua1" + MINUTE_DATE_PATTERN + "/")
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2099-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir1 + "/ua2" + MINUTE_DATE_PATTERN + "/")
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("${cluster.colo}")
.withDataLocation(testBaseDir4 + MINUTE_DATE_PATTERN + "/")
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertSucceeded(r);
AssertUtil.assertSucceeded(prism.getFeedHelper().schedule(feed.toString()));
TimeUtil.sleepSeconds(15);
InstanceUtil.waitTillInstanceReachState(cluster1OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 2,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED, 20);
//check if data has been replicated correctly
//on ua1 only ua1 should be replicated, ua2 only ua2
//number of files should be same as source
List<Path> ua1ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, new Path(testBaseDir1 + "/ua1" + testDate));
//check for no ua2 in ua1
AssertUtil.failIfStringFoundInPath(ua1ReplicatedData, "ua2");
List<Path> ua2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir1 + "/ua2" + testDate));
AssertUtil.failIfStringFoundInPath(ua2ReplicatedData, "ua1");
List<Path> ua1ReplicatedData05 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS,
new Path(testBaseDir1 + "/ua1" + testDate + "05/"));
List<Path> ua1ReplicatedData10 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS,
new Path(testBaseDir1 + "/ua1" + testDate + "10/"));
List<Path> ua2ReplicatedData10 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir1 + "/ua2" + testDate + "10"));
List<Path> ua2ReplicatedData15 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, new Path(testBaseDir1 + "/ua2" + testDate + "15"));
List<Path> ua3OriginalData05ua1 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(
testDirWithDateSourceTarget + "05/ua3/ua1"));
List<Path> ua3OriginalData10ua1 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(
testDirWithDateSourceTarget + "10/ua3/ua1"));
List<Path> ua3OriginalData10ua2 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(
testDirWithDateSourceTarget + "10/ua3/ua2"));
List<Path> ua3OriginalData15ua2 = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, new Path(
testDirWithDateSourceTarget + "15/ua3/ua2"));
AssertUtil.checkForListSizes(ua1ReplicatedData05, ua3OriginalData05ua1);
AssertUtil.checkForListSizes(ua1ReplicatedData10, ua3OriginalData10ua1);
AssertUtil.checkForListSizes(ua2ReplicatedData10, ua3OriginalData10ua2);
AssertUtil.checkForListSizes(ua2ReplicatedData15, ua3OriginalData15ua2);
}
@Test(enabled = true, groups = "embedded")
public void moreThanOneClusterWithSameNameDiffValidity() throws Exception {
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTimeUA1 = "2012-10-01T12:05Z";
String startTimeUA2 = "2012-10-01T12:10Z";
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.clearFeedClusters();
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA1, "2012-10-01T12:10Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("")
.withDataLocation(testBaseDir1 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTimeUA2, "2012-10-01T12:25Z")
.withClusterType(ClusterType.TARGET)
.withPartition("")
.withDataLocation(testBaseDir2 + MINUTE_DATE_PATTERN)
.build());
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity("2012-10-01T12:00Z", "2099-01-01T00:00Z")
.withClusterType(ClusterType.SOURCE)
.withPartition("")
.build());
LOGGER.info("feed: " + Util.prettyPrintXml(feed.toString()));
ServiceResponse r = prism.getFeedHelper().submitEntity(feed.toString());
TimeUtil.sleepSeconds(10);
AssertUtil.assertFailed(r, "is defined more than once for feed");
Assert.assertTrue(r.getMessage().contains("is defined more than once for feed"));
}
}