/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.entity;
import org.apache.falcon.FalconException;
import org.apache.falcon.entity.parser.EntityParserFactory;
import org.apache.falcon.entity.parser.FeedEntityParser;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.cluster.Properties;
import org.apache.falcon.entity.v0.cluster.Property;
import org.apache.falcon.entity.v0.feed.Argument;
import org.apache.falcon.entity.v0.feed.Arguments;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.entity.v0.feed.Clusters;
import org.apache.falcon.entity.v0.feed.Extract;
import org.apache.falcon.entity.v0.feed.ExtractMethod;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.FieldIncludeExclude;
import org.apache.falcon.entity.v0.feed.FieldsType;
import org.apache.falcon.entity.v0.feed.Import;
import org.apache.falcon.entity.v0.feed.Lifecycle;
import org.apache.falcon.entity.v0.feed.Location;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.feed.Locations;
import org.apache.falcon.entity.v0.feed.MergeType;
import org.apache.falcon.entity.v0.feed.RetentionStage;
import org.apache.falcon.entity.v0.feed.Datasource;
import org.apache.falcon.entity.v0.feed.Validity;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Inputs;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Outputs;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.resource.SchedulableEntityInstance;
import org.apache.falcon.service.LifecyclePolicyMap;
import org.apache.falcon.util.DateUtil;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;
/**
* Test for feed helper methods.
*/
public class FeedHelperTest extends AbstractTestBase {
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
private ConfigurationStore store;
@BeforeClass
public void init() throws Exception {
initConfigStore();
LifecyclePolicyMap.get().init();
}
@BeforeMethod
public void setUp() throws Exception {
cleanupStore();
store = getStore();
}
@Test
public void testPartitionExpression() {
Assert.assertEquals(FeedHelper.normalizePartitionExpression(" /a// ", " /b// "), "a/b");
Assert.assertEquals(FeedHelper.normalizePartitionExpression(null, " /b// "), "b");
Assert.assertEquals(FeedHelper.normalizePartitionExpression(null, null), "");
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testInstanceBeforeStart() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
FeedHelper.getProducerInstance(feed, getDate("2011-02-27 10:00 UTC"), cluster);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testInstanceEqualsEnd() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
FeedHelper.getProducerInstance(feed, getDate("2016-02-28 10:00 UTC"), cluster);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testInstanceOutOfSync() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
FeedHelper.getProducerInstance(feed, getDate("2016-02-28 09:04 UTC"), cluster);
}
@Test
public void testInvalidProducerInstance() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertNull(FeedHelper.getProducerInstance(feed, getDate("2012-02-28 10:40 UTC"), cluster));
}
@Test
public void testGetProducerOutOfValidity() throws FalconException, ParseException {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertEquals(FeedHelper.getProducerProcess(feed).getName(), process.getName());
SchedulableEntityInstance result = FeedHelper.getProducerInstance(feed, getDate("2012-02-28 10:45 UTC"),
cluster);
Assert.assertNull(result);
}
@Test
public void testGetConsumersOutOfValidity() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(0, -20)");
inFeed.setEnd("now(0, 0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed, getDate("2016-02-28 09:00 UTC"),
cluster);
Assert.assertTrue(result.isEmpty());
}
@Test
public void testGetFeedValidityStartAndNextInstance() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Date date = FeedHelper.getFeedValidityStart(feed, cluster.getName());
Assert.assertEquals(DateUtil.getDateFormatFromTime(date.getTime()), "2011-02-28T10:00Z");
Date nextDate = FeedHelper.getNextFeedInstanceDate(date, feed);
Assert.assertEquals(DateUtil.getDateFormatFromTime(nextDate.getTime()), "2011-02-28T10:05Z");
}
@Test
public void testGetConsumersFirstInstance() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2012-02-28 10:47 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(0, -20)");
inFeed.setEnd("now(0, 0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed, getDate("2012-02-28 10:15 UTC"),
cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
SchedulableEntityInstance consumer = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2012-02-28 10:37 UTC"), EntityType.PROCESS);
consumer.setTags(SchedulableEntityInstance.INPUT);
expected.add(consumer);
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumersLastInstance() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:20 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(0, -20)");
inFeed.setEnd("now(0, 0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed, getDate("2012-02-28 10:15 UTC"),
cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
String[] consumers = { "2012-02-28 10:20 UTC", "2012-02-28 10:30 UTC", };
for (String d : consumers) {
SchedulableEntityInstance i = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate(d), EntityType.PROCESS);
i.setTags(SchedulableEntityInstance.INPUT);
expected.add(i);
}
Assert.assertEquals(result, expected);
}
@Test
public void testGetPolicies() throws Exception {
FeedEntityParser parser = (FeedEntityParser) EntityParserFactory
.getParser(EntityType.FEED);
Feed feed = parser.parse(this.getClass().getResourceAsStream(FEED3_XML));
List<String> policies = FeedHelper.getPolicies(feed, "testCluster");
Assert.assertEquals(policies.size(), 1);
Assert.assertEquals(policies.get(0), "AgeBasedDelete");
}
@Test
public void testFeedWithNoDependencies() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed, getDate("2016-02-28 09:00 UTC"),
cluster);
Assert.assertTrue(result.isEmpty());
SchedulableEntityInstance res = FeedHelper.getProducerInstance(feed, getDate("2012-02-28 10:45 UTC"),
cluster);
Assert.assertNull(res);
}
@Test
public void testEvaluateExpression() throws Exception {
Cluster cluster = new Cluster();
cluster.setName("name");
cluster.setColo("colo");
cluster.setProperties(new Properties());
Property prop = new Property();
prop.setName("pname");
prop.setValue("pvalue");
cluster.getProperties().getProperties().add(prop);
Assert.assertEquals(FeedHelper.evaluateClusterExp(cluster, "${cluster.colo}/*/US"), "colo/*/US");
Assert.assertEquals(FeedHelper.evaluateClusterExp(cluster, "${cluster.name}/*/${cluster.pname}"),
"name/*/pvalue");
Assert.assertEquals(FeedHelper.evaluateClusterExp(cluster, "IN"), "IN");
}
@DataProvider(name = "fsPathsforDate")
public Object[][] createPathsForGetDate() {
final TimeZone utc = TimeZone.getTimeZone("UTC");
final TimeZone pacificTime = TimeZone.getTimeZone("America/Los_Angeles");
final TimeZone ist = TimeZone.getTimeZone("IST");
return new Object[][] {
{"/data/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}", "/data/2015/01/01/00/30", utc, "2015-01-01T00:30Z"},
{"/data/${YEAR}-${MONTH}-${DAY}-${HOUR}-${MINUTE}", "/data/2015-01-01-01-00", utc, "2015-01-01T01:00Z"},
{"/data/${YEAR}/${MONTH}/${DAY}", "/data/2015/01/01", utc, "2015-01-01T00:00Z"},
{"/data/${YEAR}/${MONTH}/${DAY}/data", "/data/2015/01/01/data", utc, "2015-01-01T00:00Z"},
{"/data/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}", "/data/2015-01-01/00/30", utc, null},
{"/data/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/data", "/data/2015-01-01/00/30", utc, null},
{"/d/${YEAR}/${MONTH}/${DAY}/${HOUR}/data", "/d/2015/05/25/00/data/{p1}/p2", utc, "2015-05-25T00:00Z"},
{"/data/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/data", "/data/2015/05/25/00/00/{p1}/p2", utc, null},
{"/d/${YEAR}/${MONTH}/M", "/d/2015/11/M", utc, "2015-11-01T00:00Z"},
{"/d/${YEAR}/${MONTH}/${DAY}/M", "/d/2015/11/02/M", utc, "2015-11-02T00:00Z"},
{"/d/${YEAR}/${MONTH}/${DAY}/${HOUR}/M", "/d/2015/11/01/04/M", utc, "2015-11-01T04:00Z"},
{"/d/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/M", "/d/2015/11/01/04/15/M", utc, "2015-11-01T04:15Z"},
{"/d/${YEAR}/${MONTH}/M", "/d/2015/11/M", pacificTime, "2015-11-01T07:00Z"},
{"/d/${YEAR}/${MONTH}/${DAY}/M", "/d/2015/11/02/M", pacificTime, "2015-11-02T08:00Z"},
{"/d/${YEAR}/${MONTH}/${DAY}/${HOUR}/M", "/d/2015/11/01/04/M", pacificTime, "2015-11-01T12:00Z"},
{"/d/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}/M", "/d/2015/11/01/04/15/M", ist, "2015-10-31T22:45Z"},
};
}
@Test(dataProvider = "fsPathsforDate")
public void testGetDateFromPath(String template, String path, TimeZone tz, String expectedDate) throws Exception {
Date date = FeedHelper.getDate(template, new Path(path), tz);
Assert.assertEquals(SchemaHelper.formatDateUTC(date), expectedDate);
}
@Test
public void testGetLocations() {
Cluster cluster = new Cluster();
cluster.setName("name");
Feed feed = new Feed();
Location location1 = new Location();
location1.setType(LocationType.META);
Locations locations = new Locations();
locations.getLocations().add(location1);
Location location2 = new Location();
location2.setType(LocationType.DATA);
locations.getLocations().add(location2);
org.apache.falcon.entity.v0.feed.Cluster feedCluster = new org.apache.falcon.entity.v0.feed.Cluster();
feedCluster.setName("name");
feed.setLocations(locations);
Clusters clusters = new Clusters();
feed.setClusters(clusters);
feed.getClusters().getClusters().add(feedCluster);
Assert.assertEquals(FeedHelper.getLocations(feedCluster, feed),
locations.getLocations());
Assert.assertEquals(FeedHelper.getLocation(feed, cluster, LocationType.DATA), location2);
}
@Test
public void testGetProducerProcessWithOffset() throws FalconException, ParseException {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Assert.assertNull(FeedHelper.getProducerProcess(feed));
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 10:37 UTC", "2016-02-28 10:37 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertEquals(FeedHelper.getProducerProcess(feed).getName(), process.getName());
SchedulableEntityInstance result = FeedHelper.getProducerInstance(feed, getDate("2013-02-28 10:35 UTC"),
cluster);
SchedulableEntityInstance expected = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2013-02-28 10:37 UTC"), EntityType.PROCESS);
expected.setTags(SchedulableEntityInstance.OUTPUT);
Assert.assertEquals(result, expected);
}
@Test
public void testGetProducerProcessForNow() throws FalconException, ParseException {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "days(1)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Assert.assertNull(FeedHelper.getProducerProcess(feed));
// create it's producer process submit it, test it's ProducerProcess
Process process = prepareProcess(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertEquals(FeedHelper.getProducerProcess(feed).getName(), process.getName());
SchedulableEntityInstance result = FeedHelper.getProducerInstance(feed, getDate("2013-02-28 10:00 UTC"),
cluster);
SchedulableEntityInstance expected = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2013-02-28 10:00 UTC"), EntityType.PROCESS);
expected.setTags(SchedulableEntityInstance.OUTPUT);
Assert.assertEquals(result, expected);
}
@Test
public void testGetProducerWithNowNegativeOffset() throws FalconException, ParseException {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "days(1)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Assert.assertNull(FeedHelper.getProducerProcess(feed));
// create it's producer process submit it, test it's ProducerProcess
Process process = prepareProcess(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(-4,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertEquals(FeedHelper.getProducerProcess(feed).getName(), process.getName());
SchedulableEntityInstance result = FeedHelper.getProducerInstance(feed, getDate("2013-02-27 10:00 UTC"),
cluster);
SchedulableEntityInstance expected = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2013-02-28 10:00 UTC"), EntityType.PROCESS);
expected.setTags(SchedulableEntityInstance.OUTPUT);
Assert.assertEquals(result, expected);
}
@Test
public void testGetProducerWithNowPositiveOffset() throws FalconException, ParseException {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "days(1)", "2011-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Assert.assertNull(FeedHelper.getProducerProcess(feed));
// create it's producer process submit it, test it's ProducerProcess
Process process = prepareProcess(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("now(4,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertEquals(FeedHelper.getProducerProcess(feed).getName(), process.getName());
SchedulableEntityInstance result = FeedHelper.getProducerInstance(feed, getDate("2013-02-28 10:00 UTC"),
cluster);
SchedulableEntityInstance expected = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2013-02-28 10:00 UTC"), EntityType.PROCESS);
expected.setTags(SchedulableEntityInstance.OUTPUT);
Assert.assertEquals(result, expected);
}
@Test
public void testGetProducerProcessInstance() throws FalconException, ParseException {
//create a feed, submit it, test that ProducerProcess is null
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "days(1)", "2011-02-28 00:00 UTC", "2016-02-28 10:00 UTC");
// create it's producer process submit it, test it's ProducerProcess
Process process = prepareProcess(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Outputs outputs = new Outputs();
Output outFeed = new Output();
outFeed.setName("outputFeed");
outFeed.setFeed(feed.getName());
outFeed.setInstance("today(0,0)");
outputs.getOutputs().add(outFeed);
process.setOutputs(outputs);
store.publish(EntityType.PROCESS, process);
Assert.assertEquals(FeedHelper.getProducerProcess(feed).getName(), process.getName());
SchedulableEntityInstance result = FeedHelper.getProducerInstance(feed, getDate("2013-02-28 00:00 UTC"),
cluster);
SchedulableEntityInstance expected = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2013-02-28 10:00 UTC"), EntityType.PROCESS);
expected.setTags(SchedulableEntityInstance.OUTPUT);
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumerProcesses() throws FalconException, ParseException {
//create a feed, submit it, test that ConsumerProcesses is blank list
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("outputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("today(0,0)");
inFeed.setEnd("today(0,0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<Process> result = FeedHelper.getConsumerProcesses(feed);
Assert.assertEquals(result.size(), 1);
Assert.assertTrue(result.contains(process));
}
@Test
public void testGetConsumerProcessInstances() throws Exception {
//create a feed, submit it, test that ConsumerProcesses is blank list
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-28 00:00 UTC", "2016-02-28 00:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "days(1)", "2012-02-28 10:00 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(-4, 30)");
inFeed.setEnd("now(4, 30)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-02-28 09:00 UTC"), cluster);
Assert.assertEquals(result.size(), 1);
Set<SchedulableEntityInstance> expected = new HashSet<>();
SchedulableEntityInstance ins = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2012-02-28 10:00 UTC"), EntityType.PROCESS);
ins.setTags(SchedulableEntityInstance.INPUT);
expected.add(ins);
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumerProcessInstancesWithNonUnitFrequency() throws Exception {
//create a feed, submit it, test that ConsumerProcesses is blank list
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2012-02-28 00:00 UTC", "2016-02-28 00:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 09:37 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(0, -20)");
inFeed.setEnd("now(0,0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-02-28 09:40 UTC"), cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
String[] consumers = {"2012-02-28 09:47 UTC", "2012-02-28 09:57 UTC"};
for (String d : consumers) {
SchedulableEntityInstance i = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate(d), EntityType.PROCESS);
i.setTags(SchedulableEntityInstance.INPUT);
expected.add(i);
}
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumersOutOfValidityRange() throws Exception {
//create a feed, submit it, test that ConsumerProcesses is blank list
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2010-02-28 00:00 UTC", "2016-02-28 00:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 09:37 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(0, -20)");
inFeed.setEnd("now(0,0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2010-02-28 09:40 UTC"), cluster);
Assert.assertEquals(result.size(), 0);
}
@Test
public void testGetConsumersLargeOffsetShortValidity() throws Exception {
//create a feed, submit it, test that ConsumerProcesses is blank list
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "minutes(5)", "2010-02-28 00:00 UTC", "2016-02-28 00:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "minutes(10)", "2012-02-28 09:37 UTC", "2012-02-28 09:47 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("today(-2, 0)");
inFeed.setEnd("now(0,0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-02-28 09:35 UTC"), cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
SchedulableEntityInstance consumer = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate("2012-02-28 09:37 UTC"), EntityType.PROCESS);
consumer.setTags(SchedulableEntityInstance.INPUT);
expected.add(consumer);
Assert.assertEquals(result, expected);
}
@Test
public void testGetMultipleConsumerInstances() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-27 00:00 UTC", "2016-02-28 00:00 UTC");
Process process = prepareProcess(cluster, "hours(1)", "2012-02-27 10:00 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(-4, 30)");
inFeed.setEnd("now(4, 30)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-02-28 09:00 UTC"), cluster);
Assert.assertEquals(result.size(), 9);
Set<SchedulableEntityInstance> expected = new HashSet<>();
String[] consumers = { "2012-02-28 05:00 UTC", "2012-02-28 06:00 UTC", "2012-02-28 07:00 UTC",
"2012-02-28 08:00 UTC", "2012-02-28 09:00 UTC", "2012-02-28 10:00 UTC", "2012-02-28 11:00 UTC",
"2012-02-28 12:00 UTC", "2012-02-28 13:00 UTC", };
for (String d : consumers) {
SchedulableEntityInstance i = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate(d), EntityType.PROCESS);
i.setTags(SchedulableEntityInstance.INPUT);
expected.add(i);
}
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumerWithVariableEnd() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-27 00:00 UTC", "2016-02-28 00:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "hours(1)", "2012-02-27 10:00 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("today(0, 0)");
inFeed.setEnd("now(0, 0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-02-28 00:00 UTC"), cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
String[] consumers = {"2012-02-28 11:00 UTC", "2012-02-28 16:00 UTC", "2012-02-28 18:00 UTC",
"2012-02-28 20:00 UTC", "2012-02-28 13:00 UTC", "2012-02-28 03:00 UTC", "2012-02-28 04:00 UTC",
"2012-02-28 06:00 UTC", "2012-02-28 05:00 UTC", "2012-02-28 17:00 UTC", "2012-02-28 00:00 UTC",
"2012-02-28 23:00 UTC", "2012-02-28 21:00 UTC", "2012-02-28 15:00 UTC", "2012-02-28 22:00 UTC",
"2012-02-28 14:00 UTC", "2012-02-28 08:00 UTC", "2012-02-28 12:00 UTC", "2012-02-28 02:00 UTC",
"2012-02-28 01:00 UTC", "2012-02-28 19:00 UTC", "2012-02-28 10:00 UTC", "2012-02-28 09:00 UTC",
"2012-02-28 07:00 UTC", };
for (String d : consumers) {
SchedulableEntityInstance i = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate(d), EntityType.PROCESS);
i.setTags(SchedulableEntityInstance.INPUT);
expected.add(i);
}
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumerWithVariableStart() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-27 00:00 UTC", "2016-02-28 00:00 UTC");
//create a consumer Process and submit it, assert that this is returned in ConsumerProcesses
Process process = prepareProcess(cluster, "hours(1)", "2012-02-27 10:00 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("now(0, 0)");
inFeed.setEnd("today(24, 0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-03-28 00:00 UTC"), cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
String[] consumers = {"2012-03-27 16:00 UTC", "2012-03-27 01:00 UTC", "2012-03-27 10:00 UTC",
"2012-03-27 03:00 UTC", "2012-03-27 08:00 UTC", "2012-03-27 07:00 UTC", "2012-03-27 19:00 UTC",
"2012-03-27 22:00 UTC", "2012-03-27 12:00 UTC", "2012-03-27 20:00 UTC", "2012-03-27 09:00 UTC",
"2012-03-27 04:00 UTC", "2012-03-27 14:00 UTC", "2012-03-27 05:00 UTC", "2012-03-27 23:00 UTC",
"2012-03-27 17:00 UTC", "2012-03-27 13:00 UTC", "2012-03-27 18:00 UTC", "2012-03-27 15:00 UTC",
"2012-03-28 00:00 UTC", "2012-03-27 02:00 UTC", "2012-03-27 11:00 UTC", "2012-03-27 21:00 UTC",
"2012-03-27 00:00 UTC", "2012-03-27 06:00 UTC", };
for (String d : consumers) {
SchedulableEntityInstance i = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate(d), EntityType.PROCESS);
i.setTags(SchedulableEntityInstance.INPUT);
expected.add(i);
}
Assert.assertEquals(result, expected);
}
@Test
public void testGetConsumerWithLatest() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-27 00:00 UTC", "2016-02-28 00:00 UTC");
Process process = prepareProcess(cluster, "hours(1)", "2012-02-27 10:00 UTC", "2016-02-28 10:00 UTC");
Inputs inputs = new Inputs();
Input inFeed = new Input();
inFeed.setName("inputFeed");
inFeed.setFeed(feed.getName());
inFeed.setStart("today(0, 0)");
inFeed.setEnd("latest(0)");
inputs.getInputs().add(inFeed);
process.setInputs(inputs);
store.publish(EntityType.PROCESS, process);
Set<SchedulableEntityInstance> result = FeedHelper.getConsumerInstances(feed,
getDate("2012-02-28 00:00 UTC"), cluster);
Set<SchedulableEntityInstance> expected = new HashSet<>();
String[] consumers = {"2012-02-28 23:00 UTC", "2012-02-28 04:00 UTC", "2012-02-28 10:00 UTC",
"2012-02-28 07:00 UTC", "2012-02-28 17:00 UTC", "2012-02-28 13:00 UTC", "2012-02-28 05:00 UTC",
"2012-02-28 22:00 UTC", "2012-02-28 03:00 UTC", "2012-02-28 21:00 UTC", "2012-02-28 11:00 UTC",
"2012-02-28 20:00 UTC", "2012-02-28 06:00 UTC", "2012-02-28 01:00 UTC", "2012-02-28 14:00 UTC",
"2012-02-28 00:00 UTC", "2012-02-28 18:00 UTC", "2012-02-28 12:00 UTC", "2012-02-28 16:00 UTC",
"2012-02-28 09:00 UTC", "2012-02-28 15:00 UTC", "2012-02-28 19:00 UTC", "2012-02-28 08:00 UTC",
"2012-02-28 02:00 UTC", };
for (String d : consumers) {
SchedulableEntityInstance i = new SchedulableEntityInstance(process.getName(), cluster.getName(),
getDate(d), EntityType.PROCESS);
i.setTags(SchedulableEntityInstance.INPUT);
expected.add(i);
}
Assert.assertEquals(result, expected);
}
@Test
public void testIsLifeCycleEnabled() throws Exception {
Feed feed = new Feed();
// lifecycle is not defined
Clusters clusters = new Clusters();
org.apache.falcon.entity.v0.feed.Cluster cluster = new org.apache.falcon.entity.v0.feed.Cluster();
cluster.setName("cluster1");
clusters.getClusters().add(cluster);
feed.setClusters(clusters);
Assert.assertFalse(FeedHelper.isLifecycleEnabled(feed, cluster.getName()));
// lifecycle is defined at global level
Lifecycle globalLifecycle = new Lifecycle();
RetentionStage retentionStage = new RetentionStage();
retentionStage.setFrequency(new Frequency("hours(2)"));
globalLifecycle.setRetentionStage(retentionStage);
feed.setLifecycle(globalLifecycle);
Assert.assertTrue(FeedHelper.isLifecycleEnabled(feed, cluster.getName()));
// lifecycle is defined at both global and cluster level
Lifecycle clusterLifecycle = new Lifecycle();
retentionStage = new RetentionStage();
retentionStage.setFrequency(new Frequency("hours(4)"));
clusterLifecycle.setRetentionStage(retentionStage);
feed.getClusters().getClusters().get(0).setLifecycle(clusterLifecycle);
Assert.assertTrue(FeedHelper.isLifecycleEnabled(feed, cluster.getName()));
// lifecycle is defined only at cluster level
feed.setLifecycle(null);
Assert.assertTrue(FeedHelper.isLifecycleEnabled(feed, cluster.getName()));
}
@Test
public void testGetRetentionStage() throws Exception {
Feed feed = new Feed();
feed.setFrequency(new Frequency("days(1)"));
// retention stage frequency is not defined
Lifecycle globalLifecycle = new Lifecycle();
RetentionStage globalRetentionStage = new RetentionStage();
globalLifecycle.setRetentionStage(globalRetentionStage);
feed.setLifecycle(globalLifecycle);
Clusters clusters = new Clusters();
org.apache.falcon.entity.v0.feed.Cluster cluster = new org.apache.falcon.entity.v0.feed.Cluster();
cluster.setName("cluster1");
clusters.getClusters().add(cluster);
feed.setClusters(clusters);
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("days(1)"));
// lifecycle is defined only at global level
globalRetentionStage.setFrequency(new Frequency("hours(2)"));
globalLifecycle.setRetentionStage(globalRetentionStage);
feed.setLifecycle(globalLifecycle);
Assert.assertNotNull(FeedHelper.getRetentionStage(feed, cluster.getName()));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
feed.getLifecycle().getRetentionStage().getFrequency());
// lifecycle is defined at both global and cluster level
Lifecycle clusterLifecycle = new Lifecycle();
RetentionStage clusterRetentionStage = new RetentionStage();
clusterRetentionStage.setFrequency(new Frequency("hours(4)"));
clusterLifecycle.setRetentionStage(clusterRetentionStage);
feed.getClusters().getClusters().get(0).setLifecycle(clusterLifecycle);
Assert.assertNotNull(FeedHelper.getRetentionStage(feed, cluster.getName()));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
cluster.getLifecycle().getRetentionStage().getFrequency());
// lifecycle at both level - retention only at cluster level.
feed.getLifecycle().setRetentionStage(null);
Assert.assertNotNull(FeedHelper.getRetentionStage(feed, cluster.getName()));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
cluster.getLifecycle().getRetentionStage().getFrequency());
// lifecycle at both level - retention only at global level.
feed.getLifecycle().setRetentionStage(globalRetentionStage);
feed.getClusters().getClusters().get(0).getLifecycle().setRetentionStage(null);
Assert.assertNotNull(FeedHelper.getRetentionStage(feed, cluster.getName()));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
feed.getLifecycle().getRetentionStage().getFrequency());
// lifecycle is defined only at cluster level
feed.setLifecycle(null);
feed.getClusters().getClusters().get(0).getLifecycle().setRetentionStage(clusterRetentionStage);
Assert.assertNotNull(FeedHelper.getRetentionStage(feed, cluster.getName()));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
cluster.getLifecycle().getRetentionStage().getFrequency());
}
@Test
public void testGetRetentionFrequency() throws Exception {
Feed feed = new Feed();
feed.setFrequency(new Frequency("days(10)"));
// no retention stage frequency defined - test both daily and monthly feeds
Lifecycle globalLifecycle = new Lifecycle();
RetentionStage globalRetentionStage = new RetentionStage();
globalLifecycle.setRetentionStage(globalRetentionStage);
feed.setLifecycle(globalLifecycle);
Clusters clusters = new Clusters();
org.apache.falcon.entity.v0.feed.Cluster cluster = new org.apache.falcon.entity.v0.feed.Cluster();
cluster.setName("cluster1");
clusters.getClusters().add(cluster);
feed.setClusters(clusters);
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("days(10)"));
feed.setFrequency(new Frequency("hours(1)"));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("hours(6)"));
feed.setFrequency(new Frequency("minutes(10)"));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("hours(6)"));
feed.setFrequency(new Frequency("hours(7)"));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("hours(7)"));
feed.setFrequency(new Frequency("days(2)"));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("days(2)"));
// lifecycle at both level - retention only at global level.
feed.setFrequency(new Frequency("hours(1)"));
globalRetentionStage.setFrequency(new Frequency("hours(2)"));
globalLifecycle.setRetentionStage(globalRetentionStage);
feed.setLifecycle(globalLifecycle);
Lifecycle clusterLifecycle = new Lifecycle();
RetentionStage clusterRetentionStage = new RetentionStage();
clusterLifecycle.setRetentionStage(clusterRetentionStage);
feed.getClusters().getClusters().get(0).setLifecycle(clusterLifecycle);
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("hours(6)"));
// lifecycle at both level - retention only at cluster level.
feed.getLifecycle().getRetentionStage().setFrequency(null);
clusterRetentionStage.setFrequency(new Frequency("hours(4)"));
Assert.assertEquals(FeedHelper.getLifecycleRetentionFrequency(feed, cluster.getName()),
new Frequency("hours(4)"));
}
@Test
public void testFeedImportSnapshot() throws Exception {
Cluster cluster = publishCluster();
Feed feed = importFeedSnapshot(cluster, "hours(1)", "2012-02-07 00:00 UTC", "2020-02-25 00:00 UTC");
org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
Date startInstResult = FeedHelper.getImportInitalInstance(feedCluster);
Assert.assertNotNull(feed.getClusters().getClusters());
Assert.assertNotNull(feed.getClusters().getClusters().get(0));
Assert.assertNotNull(feed.getClusters().getClusters().get(0).getValidity());
Assert.assertNotNull(feed.getClusters().getClusters().get(0).getValidity().getStart());
Assert.assertNotNull(startInstResult);
Assert.assertNotNull(feedCluster.getValidity().getStart());
Assert.assertEquals(getDate("2012-02-07 00:00 UTC"), feedCluster.getValidity().getStart());
Assert.assertTrue(FeedHelper.isImportEnabled(feedCluster));
Assert.assertEquals(MergeType.SNAPSHOT, FeedHelper.getImportMergeType(feedCluster));
Assert.assertEquals(startInstResult, feedCluster.getValidity().getStart());
}
@Test
public void testFeedImportFields() throws Exception {
Cluster cluster = publishCluster();
Feed feed = importFeedSnapshot(cluster, "hours(1)", "2012-02-07 00:00 UTC", "2020-02-25 00:00 UTC");
org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
Date startInstResult = FeedHelper.getImportInitalInstance(feedCluster);
List<String> fieldList = FeedHelper.getImportFieldList(feedCluster);
Assert.assertEquals(2, fieldList.size());
Assert.assertFalse(FeedHelper.isFieldExcludes(feedCluster.getImport().getSource()));
}
@Test
public void testFeedImportAppend() throws Exception {
Cluster cluster = publishCluster();
Feed feed = importFeedAppend(cluster, "hours(1)", "2012-02-07 00:00 UTC", "2020-02-25 00:00 UTC");
org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
Date startInstResult = FeedHelper.getImportInitalInstance(feedCluster);
Assert.assertEquals(startInstResult, feed.getClusters().getClusters().get(0).getValidity().getStart());
}
public void testGetFeedClusterValidity() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-07 00:00 UTC", "2020-02-25 00:00 UTC");
Validity validity = FeedHelper.getClusterValidity(feed, cluster.getName());
Assert.assertEquals(validity.getStart(), getDate("2012-02-07 00:00 UTC"));
Assert.assertEquals(validity.getEnd(), getDate("2020-02-25 00:00 UTC"));
}
@Test(expectedExceptions = FalconException.class)
public void testGetClusterValidityInvalidCluster() throws Exception {
Cluster cluster = publishCluster();
Feed feed = publishFeed(cluster, "hours(1)", "2012-02-07 00:00 UTC", "2020-02-25 00:00 UTC");
FeedHelper.getClusterValidity(feed, "abracadabra");
}
private Validity getFeedValidity(String start, String end) throws ParseException {
Validity validity = new Validity();
validity.setStart(getDate(start));
validity.setEnd(getDate(end));
return validity;
}
private org.apache.falcon.entity.v0.process.Validity getProcessValidity(String start, String end) throws
ParseException {
org.apache.falcon.entity.v0.process.Validity validity = new org.apache.falcon.entity.v0.process.Validity();
validity.setStart(getDate(start));
validity.setEnd(getDate(end));
return validity;
}
private Date getDate(String dateString) throws ParseException {
DateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm Z");
return format.parse(dateString);
}
private Cluster publishCluster() throws FalconException {
Cluster cluster = new Cluster();
cluster.setName("feedCluster");
cluster.setColo("colo");
store.publish(EntityType.CLUSTER, cluster);
return cluster;
}
private Feed publishFeed(Cluster cluster, String frequency, String start, String end)
throws FalconException, ParseException {
return publishFeed(cluster, frequency, start, end, null);
}
private Feed publishFeed(Cluster cluster, String frequency, String start, String end, Import imp)
throws FalconException, ParseException {
Feed feed = new Feed();
feed.setName("feed");
Frequency f = new Frequency(frequency);
feed.setFrequency(f);
feed.setTimezone(UTC);
Clusters fClusters = new Clusters();
org.apache.falcon.entity.v0.feed.Cluster fCluster = new org.apache.falcon.entity.v0.feed.Cluster();
fCluster.setType(ClusterType.SOURCE);
fCluster.setImport(imp);
fCluster.setName(cluster.getName());
fCluster.setValidity(getFeedValidity(start, end));
fClusters.getClusters().add(fCluster);
feed.setClusters(fClusters);
store.publish(EntityType.FEED, feed);
return feed;
}
private Process prepareProcess(Cluster cluster, String frequency, String start, String end) throws ParseException {
Process process = new Process();
process.setName("process");
process.setTimezone(UTC);
org.apache.falcon.entity.v0.process.Clusters pClusters = new org.apache.falcon.entity.v0.process.Clusters();
org.apache.falcon.entity.v0.process.Cluster pCluster = new org.apache.falcon.entity.v0.process.Cluster();
pCluster.setName(cluster.getName());
org.apache.falcon.entity.v0.process.Validity validity = getProcessValidity(start, end);
pCluster.setValidity(validity);
pClusters.getClusters().add(pCluster);
process.setClusters(pClusters);
Frequency f = new Frequency(frequency);
process.setFrequency(f);
return process;
}
private Feed importFeedSnapshot(Cluster cluster, String frequency, String start, String end)
throws FalconException, ParseException {
Import imp = getAnImport(MergeType.SNAPSHOT);
Feed feed = publishFeed(cluster, frequency, start, end, imp);
return feed;
}
private Feed importFeedAppend(Cluster cluster, String frequency, String start, String end)
throws FalconException, ParseException {
Import imp = getAnImport(MergeType.APPEND);
Feed feed = publishFeed(cluster, frequency, start, end);
return feed;
}
private Import getAnImport(MergeType mergeType) {
Extract extract = new Extract();
extract.setType(ExtractMethod.FULL);
extract.setMergepolicy(mergeType);
FieldIncludeExclude fieldInclude = new FieldIncludeExclude();
fieldInclude.getFields().add("id");
fieldInclude.getFields().add("name");
FieldsType fields = new FieldsType();
fields.setIncludes(fieldInclude);
Datasource source = new Datasource();
source.setName("test-db");
source.setTableName("test-table");
source.setExtract(extract);
source.setFields(fields);
Argument a1 = new Argument();
a1.setName("--split_by");
a1.setValue("id");
Argument a2 = new Argument();
a2.setName("--num-mappers");
a2.setValue("2");
Arguments args = new Arguments();
List<Argument> argList = args.getArguments();
argList.add(a1);
argList.add(a2);
Import imp = new Import();
imp.setSource(source);
imp.setArguments(args);
return imp;
}
}