/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.regression;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.feed.ActionType;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.regression.Entities.FeedMerlin;
import org.apache.falcon.regression.core.bundle.Bundle;
import org.apache.falcon.regression.core.enumsAndConstants.MerlinConstants;
import org.apache.falcon.regression.core.helpers.ColoHelper;
import org.apache.falcon.regression.core.supportClasses.ExecResult;
import org.apache.falcon.regression.core.util.AssertUtil;
import org.apache.falcon.regression.core.util.BundleUtil;
import org.apache.falcon.regression.core.util.HadoopUtil;
import org.apache.falcon.regression.core.util.InstanceUtil;
import org.apache.falcon.regression.core.util.OSUtil;
import org.apache.falcon.regression.core.util.OozieUtil;
import org.apache.falcon.regression.core.util.TimeUtil;
import org.apache.falcon.regression.core.util.Util;
import org.apache.falcon.regression.testHelper.BaseTestClass;
import org.apache.falcon.resource.InstancesResult;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.apache.oozie.client.CoordinatorAction;
import org.apache.oozie.client.OozieClient;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import javax.xml.bind.JAXBException;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* feed replication test.
* Replicates empty directories as well as directories containing data.
*/
@Test(groups = { "distributed", "embedded", "sanity", "multiCluster" })
public class FeedReplicationTest extends BaseTestClass {
private ColoHelper cluster1 = servers.get(0);
private ColoHelper cluster2 = servers.get(1);
private ColoHelper cluster3 = servers.get(2);
private FileSystem cluster1FS = serverFS.get(0);
private FileSystem cluster2FS = serverFS.get(1);
private FileSystem cluster3FS = serverFS.get(2);
private OozieClient cluster2OC = serverOC.get(1);
private OozieClient cluster3OC = serverOC.get(2);
private String baseTestDir = cleanAndGetTestDir();
private String sourcePath = baseTestDir + "/source";
private String feedDataLocation = baseTestDir + "/source" + MINUTE_DATE_PATTERN;
private String targetPath = baseTestDir + "/target";
private String targetDataLocation = targetPath + MINUTE_DATE_PATTERN;
private static final Logger LOGGER = Logger.getLogger(FeedReplicationTest.class);
@BeforeMethod(alwaysRun = true)
public void setUp() throws JAXBException, IOException {
Bundle bundle = BundleUtil.readFeedReplicationBundle();
bundles[0] = new Bundle(bundle, cluster1);
bundles[1] = new Bundle(bundle, cluster2);
bundles[2] = new Bundle(bundle, cluster3);
bundles[0].generateUniqueBundle(this);
bundles[1].generateUniqueBundle(this);
bundles[2].generateUniqueBundle(this);
}
@AfterMethod(alwaysRun = true)
public void tearDown() throws IOException {
removeTestClassEntities();
cleanTestsDirs();
}
/**
* Test demonstrates replication of stored data from one source cluster to one target cluster.
* It checks the lifecycle of replication workflow instance including its creation. When
* replication ends test checks if data was replicated correctly.
* Also checks for presence of _SUCCESS file in target directory.
*/
@Test(dataProvider = "dataFlagProvider")
public void replicate1Source1Target(boolean dataFlag)
throws Exception {
Bundle.submitCluster(bundles[0], bundles[1]);
String startTime = TimeUtil.getTimeWrtSystemTime(0);
String endTime = TimeUtil.addMinsToTime(startTime, 5);
LOGGER.info("Time range between : " + startTime + " and " + endTime);
//configure feed
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(feedDataLocation);
//erase all clusters from feed definition
feed.clearFeedClusters();
//set cluster1 as source
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.SOURCE)
.build());
//set cluster2 as target
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.TARGET)
.withDataLocation(targetDataLocation)
.build());
feed.withProperty("job.counter", "true");
//submit and schedule feed
LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
//upload necessary data
DateTime date = new DateTime(startTime, DateTimeZone.UTC);
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy'/'MM'/'dd'/'HH'/'mm'");
String timePattern = fmt.print(date);
String sourceLocation = sourcePath + "/" + timePattern + "/";
String targetLocation = targetPath + "/" + timePattern + "/";
HadoopUtil.recreateDir(cluster1FS, sourceLocation);
Path toSource = new Path(sourceLocation);
Path toTarget = new Path(targetLocation);
if (dataFlag) {
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation,
OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.xml"));
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation,
OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile1.txt"));
}
//check if coordinator exists
InstanceUtil.waitTillInstancesAreCreated(cluster2OC, feed.toString(), 0);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 1);
//replication should start, wait while it ends
InstanceUtil.waitTillInstanceReachState(cluster2OC, Util.readEntityName(feed.toString()), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);
//check if data has been replicated correctly
List<Path> cluster1ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, toSource);
List<Path> cluster2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, toTarget);
AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
//_SUCCESS does not exist in source
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster1FS, toSource, ""), false);
//_SUCCESS should exist in target
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster2FS, toTarget, ""), true);
if (!MerlinConstants.IS_SECURE){
AssertUtil.assertLogMoverPath(true, Util.readEntityName(feed.toString()),
cluster2FS, "feed", "Success logs are not present");
}
ExecResult execResult = cluster1.getFeedHelper().getCLIMetrics(feed.getName());
AssertUtil.assertCLIMetrics(execResult, feed.getName(), 1, dataFlag);
}
/**
* Test demonstrates replication of stored data from one source cluster to two target clusters.
* It checks the lifecycle of replication workflow instances including their creation on both
* targets. When replication ends test checks if data was replicated correctly.
* Also checks for presence of _SUCCESS file in target directory.
*/
@Test(dataProvider = "dataFlagProvider")
public void replicate1Source2Targets(boolean dataFlag) throws Exception {
Bundle.submitCluster(bundles[0], bundles[1], bundles[2]);
String startTime = TimeUtil.getTimeWrtSystemTime(0);
String endTime = TimeUtil.addMinsToTime(startTime, 5);
LOGGER.info("Time range between : " + startTime + " and " + endTime);
//configure feed
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(feedDataLocation);
//erase all clusters from feed definition
feed.clearFeedClusters();
//set cluster1 as source
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.SOURCE)
.build());
//set cluster2 as target
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.TARGET)
.withDataLocation(targetDataLocation)
.build());
//set cluster3 as target
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[2].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.TARGET)
.withDataLocation(targetDataLocation)
.build());
feed.withProperty("job.counter", "true");
//submit and schedule feed
LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
//upload necessary data
DateTime date = new DateTime(startTime, DateTimeZone.UTC);
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy'/'MM'/'dd'/'HH'/'mm'");
String timePattern = fmt.print(date);
String sourceLocation = sourcePath + "/" + timePattern + "/";
String targetLocation = targetPath + "/" + timePattern + "/";
HadoopUtil.recreateDir(cluster1FS, sourceLocation);
Path toSource = new Path(sourceLocation);
Path toTarget = new Path(targetLocation);
if (dataFlag) {
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation,
OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.xml"));
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation,
OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile1.txt"));
}
//check if all coordinators exist
InstanceUtil.waitTillInstancesAreCreated(cluster2OC, feed.toString(), 0);
InstanceUtil.waitTillInstancesAreCreated(cluster3OC, feed.toString(), 0);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 1);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster3OC, feed.getName(), "REPLICATION"), 1);
//replication on cluster 2 should start, wait till it ends
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);
//replication on cluster 3 should start, wait till it ends
InstanceUtil.waitTillInstanceReachState(cluster3OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);
//check if data has been replicated correctly
List<Path> cluster1ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, toSource);
List<Path> cluster2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, toTarget);
List<Path> cluster3ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster3FS, toTarget);
AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster3ReplicatedData);
//_SUCCESS does not exist in source
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster1FS, toSource, ""), false);
//_SUCCESS should exist in target
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster2FS, toTarget, ""), true);
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster3FS, toTarget, ""), true);
if (!MerlinConstants.IS_SECURE){
AssertUtil.assertLogMoverPath(true, Util.readEntityName(feed.toString()),
cluster2FS, "feed", "Success logs are not present");
}
ExecResult execResult = cluster1.getFeedHelper().getCLIMetrics(feed.getName());
AssertUtil.assertCLIMetrics(execResult, feed.getName(), 1, dataFlag);
}
/**
* Test demonstrates how replication depends on availability flag. Scenario includes one
* source and one target cluster. When feed is submitted and scheduled and data is available,
* feed still waits for availability flag (file which name is defined as availability flag in
* feed definition). As soon as mentioned file is got uploaded in data directory,
* replication starts and when it ends test checks if data was replicated correctly.
* Also checks for presence of availability flag in target directory.
*/
@Test(dataProvider = "dataFlagProvider")
public void availabilityFlagTest(boolean dataFlag) throws Exception {
//replicate1Source1Target scenario + set availability flag but don't upload required file
Bundle.submitCluster(bundles[0], bundles[1]);
String startTime = TimeUtil.getTimeWrtSystemTime(0);
String endTime = TimeUtil.addMinsToTime(startTime, 5);
LOGGER.info("Time range between : " + startTime + " and " + endTime);
//configure feed
String availabilityFlagName = "availabilityFlag.txt";
String feedName = Util.readEntityName(bundles[0].getDataSets().get(0));
FeedMerlin feedElement = bundles[0].getFeedElement(feedName);
feedElement.setAvailabilityFlag(availabilityFlagName);
bundles[0].writeFeedElement(feedElement, feedName);
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(feedDataLocation);
//erase all clusters from feed definition
feed.clearFeedClusters();
//set cluster1 as source
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.SOURCE)
.build());
//set cluster2 as target
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.TARGET)
.withDataLocation(targetDataLocation)
.build());
feed.withProperty("job.counter", "true");
//submit and schedule feed
LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
//upload necessary data
DateTime date = new DateTime(startTime, DateTimeZone.UTC);
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy'/'MM'/'dd'/'HH'/'mm'");
String timePattern = fmt.print(date);
String sourceLocation = sourcePath + "/" + timePattern + "/";
String targetLocation = targetPath + "/" + timePattern + "/";
HadoopUtil.recreateDir(cluster1FS, sourceLocation);
Path toSource = new Path(sourceLocation);
Path toTarget = new Path(targetLocation);
if (dataFlag) {
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation,
OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.xml"));
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation,
OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile1.txt"));
}
//check while instance is got created
InstanceUtil.waitTillInstancesAreCreated(cluster2OC, feed.toString(), 0);
//check if coordinator exists
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feedName, "REPLICATION"), 1);
//replication should not start even after time
TimeUtil.sleepSeconds(60);
InstancesResult r = prism.getFeedHelper().getProcessInstanceStatus(feedName,
"?start=" + startTime + "&end=" + endTime);
InstanceUtil.validateResponse(r, 1, 0, 0, 1, 0);
LOGGER.info("Replication didn't start.");
//create availability flag on source
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation, OSUtil.concat(OSUtil.RESOURCES, availabilityFlagName));
//check if instance become running
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.RUNNING, EntityType.FEED);
//wait till instance succeed
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);
//check if data was replicated correctly
List<Path> cluster1ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster1FS, toSource);
LOGGER.info("Data on source cluster: " + cluster1ReplicatedData);
List<Path> cluster2ReplicatedData = HadoopUtil
.getAllFilesRecursivelyHDFS(cluster2FS, toTarget);
LOGGER.info("Data on target cluster: " + cluster2ReplicatedData);
AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
//availabilityFlag exists in source
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster1FS, toSource, availabilityFlagName), true);
//availabilityFlag should exist in target
Assert.assertEquals(HadoopUtil.getSuccessFolder(cluster2FS, toTarget, availabilityFlagName), true);
if (!MerlinConstants.IS_SECURE){
AssertUtil.assertLogMoverPath(true, Util.readEntityName(feed.toString()),
cluster2FS, "feed", "Success logs are not present");
}
ExecResult execResult = cluster1.getFeedHelper().getCLIMetrics(feed.getName());
AssertUtil.assertCLIMetrics(execResult, feed.getName(), 1, dataFlag);
}
/**
* Test for https://issues.apache.org/jira/browse/FALCON-668.
* Check that new DistCp options are allowed.
*/
@Test
public void testNewDistCpOptions() throws Exception {
Bundle.submitCluster(bundles[0], bundles[1]);
String startTime = TimeUtil.getTimeWrtSystemTime(0);
String endTime = TimeUtil.addMinsToTime(startTime, 5);
LOGGER.info("Time range between : " + startTime + " and " + endTime);
//configure feed
String feedName = Util.readEntityName(bundles[0].getDataSets().get(0));
FeedMerlin feedElement = bundles[0].getFeedElement(feedName);
bundles[0].writeFeedElement(feedElement, feedName);
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(feedDataLocation);
//erase all clusters from feed definition
feed.clearFeedClusters();
//set cluster1 as source
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.SOURCE)
.build());
//set cluster2 as target
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.TARGET)
.withDataLocation(targetDataLocation)
.build());
feed.withProperty("job.counter", "true");
//add custom properties to feed
HashMap<String, String> propMap = new HashMap<>();
propMap.put("overwrite", "true");
propMap.put("ignoreErrors", "false");
propMap.put("skipChecksum", "false");
propMap.put("removeDeletedFiles", "true");
propMap.put("preserveBlockSize", "true");
propMap.put("preserveReplicationNumber", "true");
propMap.put("preservePermission", "true");
for (Map.Entry<String, String> entry : propMap.entrySet()) {
feed.withProperty(entry.getKey(), entry.getValue());
}
//add custom property which shouldn't be passed to workflow
HashMap<String, String> unsupportedPropMap = new HashMap<>();
unsupportedPropMap.put("myCustomProperty", "true");
feed.withProperty("myCustomProperty", "true");
//upload necessary data to source
DateTime date = new DateTime(startTime, DateTimeZone.UTC);
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy'/'MM'/'dd'/'HH'/'mm'");
String timePattern = fmt.print(date);
String sourceLocation = sourcePath + "/" + timePattern + "/";
HadoopUtil.recreateDir(cluster1FS, sourceLocation);
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation, OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.xml"));
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation, OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile1.txt"));
//copy 2 files to target to check if they will be deleted because of removeDeletedFiles property
String targetLocation = targetPath + "/" + timePattern + "/";
cluster2FS.copyFromLocalFile(new Path(OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile3.txt")),
new Path(targetLocation + "dataFile3.txt"));
//submit and schedule feed
LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
//check while instance is got created
InstanceUtil.waitTillInstancesAreCreated(cluster2OC, feed.toString(), 0);
//check if coordinator exists and replication starts
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 1);
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.RUNNING, EntityType.FEED);
//check that properties were passed to workflow definition
String bundleId = OozieUtil.getLatestBundleID(cluster2OC, feedName, EntityType.FEED);
String coordId = OozieUtil.getReplicationCoordID(bundleId, cluster2.getFeedHelper()).get(0);
CoordinatorAction coordinatorAction = cluster2OC.getCoordJobInfo(coordId).getActions().get(0);
String wfDefinition = cluster2OC.getJobDefinition(coordinatorAction.getExternalId());
LOGGER.info(String.format("Definition of coordinator job action %s : \n %s \n",
coordinatorAction.getExternalId(), Util.prettyPrintXml(wfDefinition)));
Assert.assertTrue(OozieUtil.propsArePresentInWorkflow(wfDefinition, "replication", propMap),
"New distCp supported properties should be passed to replication args list.");
Assert.assertFalse(OozieUtil.propsArePresentInWorkflow(wfDefinition, "replication", unsupportedPropMap),
"Unsupported properties shouldn't be passed to replication args list.");
//check that replication succeeds
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);
List<Path> finalFiles = HadoopUtil.getAllFilesRecursivelyHDFS(cluster2FS, new Path(targetPath));
Assert.assertEquals(finalFiles.size(), 2, "Only replicated files should be present on target "
+ "because of 'removeDeletedFiles' distCp property.");
ExecResult execResult = cluster1.getFeedHelper().getCLIMetrics(feed.getName());
AssertUtil.assertCLIMetrics(execResult, feed.getName(), 1, true);
}
/**
* Test demonstrates failure pf replication of stored data from one source cluster to one target cluster.
* When replication job fails test checks if failed logs are present in staging directory or not.
*/
@Test
public void replicate1Source1TargetFail()
throws Exception {
Bundle.submitCluster(bundles[0], bundles[1]);
String startTime = TimeUtil.getTimeWrtSystemTime(0);
String endTime = TimeUtil.addMinsToTime(startTime, 5);
LOGGER.info("Time range between : " + startTime + " and " + endTime);
//configure feed
FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
feed.setFilePath(feedDataLocation);
//erase all clusters from feed definition
feed.clearFeedClusters();
//set cluster1 as source
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.SOURCE)
.build());
//set cluster2 as target
feed.addFeedCluster(
new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[1].getClusters().get(0)))
.withRetention("days(1000000)", ActionType.DELETE)
.withValidity(startTime, endTime)
.withClusterType(ClusterType.TARGET)
.withDataLocation(targetDataLocation)
.build());
//submit and schedule feed
LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
//upload necessary data
DateTime date = new DateTime(startTime, DateTimeZone.UTC);
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy'/'MM'/'dd'/'HH'/'mm'");
String timePattern = fmt.print(date);
String sourceLocation = sourcePath + "/" + timePattern + "/";
String targetLocation = targetPath + "/" + timePattern + "/";
HadoopUtil.recreateDir(cluster1FS, sourceLocation);
Path toSource = new Path(sourceLocation);
Path toTarget = new Path(targetLocation);
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation, OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile.xml"));
HadoopUtil.copyDataToFolder(cluster1FS, sourceLocation, OSUtil.concat(OSUtil.NORMAL_INPUT, "dataFile1.txt"));
//check if coordinator exists
InstanceUtil.waitTillInstancesAreCreated(cluster2OC, feed.toString(), 0);
Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(cluster2OC, feed.getName(), "REPLICATION"), 1);
//check if instance become running
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.RUNNING, EntityType.FEED);
HadoopUtil.deleteDirIfExists(sourceLocation, cluster1FS);
//check if instance became killed
InstanceUtil.waitTillInstanceReachState(cluster2OC, feed.getName(), 1,
CoordinatorAction.Status.KILLED, EntityType.FEED);
if (!MerlinConstants.IS_SECURE){
AssertUtil.assertLogMoverPath(true, Util.readEntityName(feed.toString()),
cluster2FS, "feed", "Success logs are not present");
}
}
/* Flag value denotes whether to add data for replication or not.
* flag=true : add data for replication.
* flag=false : let empty directories be replicated.
*/
@DataProvider(name = "dataFlagProvider")
private Object[][] dataFlagProvider() {
return new Object[][] {
new Object[] {true, },
new Object[] {false, },
};
}
}