/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.metadata;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.GraphQuery;
import com.tinkerpop.blueprints.Vertex;
import org.apache.falcon.FalconException;
import org.apache.falcon.cluster.util.EntityBuilderTestUtil;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.CatalogTable;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.Location;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.feed.Locations;
import org.apache.falcon.entity.v0.process.EngineType;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Inputs;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Outputs;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.retention.EvictedInstanceSerDe;
import org.apache.falcon.security.CurrentUser;
import org.apache.falcon.service.Services;
import org.apache.falcon.util.StartupProperties;
import org.apache.falcon.workflow.WorkflowExecutionArgs;
import org.apache.falcon.workflow.WorkflowExecutionContext;
import org.apache.falcon.workflow.WorkflowJobEndNotificationService;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import static org.apache.falcon.workflow.WorkflowExecutionContext.EntityOperations;
/**
* Test for Metadata relationship mapping service.
*/
public class MetadataMappingServiceTest {
public static final String FALCON_USER = "falcon-user";
private static final String LOGS_DIR = "jail://global:00/falcon/staging/feed/logs";
private static final String NOMINAL_TIME = "2014-01-01-01-00";
public static final String CLUSTER_ENTITY_NAME = "primary-cluster";
public static final String BCP_CLUSTER_ENTITY_NAME = "bcp-cluster";
public static final String PROCESS_ENTITY_NAME = "sample-process";
public static final String COLO_NAME = "west-coast";
public static final String GENERATE_WORKFLOW_NAME = "imp-click-join-workflow";
public static final String REPLICATION_WORKFLOW_NAME = "replication-policy-workflow";
private static final String EVICTION_WORKFLOW_NAME = "eviction-policy-workflow";
public static final String WORKFLOW_VERSION = "1.0.9";
public static final String INPUT_FEED_NAMES = "impression-feed#clicks-feed";
public static final String INPUT_INSTANCE_PATHS =
"jail://global:00/falcon/impression-feed/2014/01/01,jail://global:00/falcon/impression-feed/2014/01/02"
+ "#jail://global:00/falcon/clicks-feed/2014-01-01";
public static final String INPUT_INSTANCE_PATHS_NO_DATE =
"jail://global:00/falcon/impression-feed,jail://global:00/falcon/impression-feed"
+ "#jail://global:00/falcon/clicks-feed";
public static final String OUTPUT_FEED_NAMES = "imp-click-join1,imp-click-join2";
public static final String OUTPUT_INSTANCE_PATHS =
"jail://global:00/falcon/imp-click-join1/20140101,jail://global:00/falcon/imp-click-join2/20140101";
private static final String REPLICATED_FEED = "raw-click";
private static final String EVICTED_FEED = "imp-click-join1";
private static final String EVICTED_INSTANCE_PATHS =
"jail://global:00/falcon/imp-click-join1/20140101,jail://global:00/falcon/imp-click-join1/20140102";
public static final String OUTPUT_INSTANCE_PATHS_NO_DATE =
"jail://global:00/falcon/imp-click-join1,jail://global:00/falcon/imp-click-join2";
public static final String COUNTERS = "TIMETAKEN:36956,COPY:30,BYTESCOPIED:1000";
public static final String BROKER = "org.apache.activemq.ActiveMQConnectionFactory";
private ConfigurationStore configStore;
private MetadataMappingService service;
private Cluster clusterEntity;
private Cluster anotherCluster;
private List<Feed> inputFeeds = new ArrayList<>();
private List<Feed> outputFeeds = new ArrayList<>();
private Process processEntity;
@BeforeClass
public void setUp() throws Exception {
CurrentUser.authenticate(FALCON_USER);
configStore = ConfigurationStore.get();
Services.get().register(new WorkflowJobEndNotificationService());
StartupProperties.get().setProperty("falcon.graph.storage.backend", "berkeleyje");
String graphDBDir = "target/graphdb-" + System.currentTimeMillis();
StartupProperties.get().setProperty("falcon.graph.storage.directory", graphDBDir);
StartupProperties.get().setProperty("falcon.graph.serialize.path", graphDBDir);
StartupProperties.get().setProperty("falcon.graph.preserve.history", "true");
service = new MetadataMappingService();
service.init();
Set<String> vertexPropertyKeys = service.getVertexIndexedKeys();
System.out.println("Got vertex property keys: " + vertexPropertyKeys);
Set<String> edgePropertyKeys = service.getEdgeIndexedKeys();
System.out.println("Got edge property keys: " + edgePropertyKeys);
}
@AfterClass
public void tearDown() throws Exception {
GraphUtils.dump(service.getGraph(), System.out);
cleanUp();
StartupProperties.get().setProperty("falcon.graph.preserve.history", "false");
}
@AfterMethod
public void printGraph() throws Exception {
GraphUtils.dump(service.getGraph());
}
private GraphQuery getQuery() {
return service.getGraph().query();
}
@Test
public void testGetName() throws Exception {
Assert.assertEquals(service.getName(), MetadataMappingService.SERVICE_NAME);
}
@Test
public void testOnAddClusterEntity() throws Exception {
// Get the before vertices and edges
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME,
"classification=production");
verifyEntityWasAddedToGraph(CLUSTER_ENTITY_NAME, RelationshipType.CLUSTER_ENTITY);
verifyClusterEntityEdges();
// +4 = cluster, colo, tag, user
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 4);
// +3 = cluster to colo, user and tag
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 3);
}
@Test (dependsOnMethods = "testOnAddClusterEntity")
public void testOnAddFeedEntity() throws Exception {
// Get the before vertices and edges
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
Feed impressionsFeed = addFeedEntity("impression-feed", clusterEntity,
"classified-as=Secure", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/impression-feed/${YEAR}/${MONTH}/${DAY}");
inputFeeds.add(impressionsFeed);
verifyEntityWasAddedToGraph(impressionsFeed.getName(), RelationshipType.FEED_ENTITY);
verifyFeedEntityEdges(impressionsFeed.getName(), "Secure", "analytics");
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 3); // +3 = feed, tag, group,
// user
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 4); // +4 = cluster, tag, group, user
// Get the before vertices and edges
beforeVerticesCount = getVerticesCount(service.getGraph());
beforeEdgesCount = getEdgesCount(service.getGraph());
Feed clicksFeed = addFeedEntity("clicks-feed", clusterEntity,
"classified-as=Secure,classified-as=Financial", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/clicks-feed/${YEAR}-${MONTH}-${DAY}");
inputFeeds.add(clicksFeed);
verifyEntityWasAddedToGraph(clicksFeed.getName(), RelationshipType.FEED_ENTITY);
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 2); // feed and financial vertex
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 5); // +5 = cluster + user + 2Group
// + Tag
// Get the before vertices and edges
beforeVerticesCount = getVerticesCount(service.getGraph());
beforeEdgesCount = getEdgesCount(service.getGraph());
Feed join1Feed = addFeedEntity("imp-click-join1", clusterEntity,
"classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join1/${YEAR}${MONTH}${DAY}");
outputFeeds.add(join1Feed);
verifyEntityWasAddedToGraph(join1Feed.getName(), RelationshipType.FEED_ENTITY);
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 3); // + 3 = 1 feed and 2
// groups
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 5); // +5 = cluster + user +
// Group + 2Tags
// Get the before vertices and edges
beforeVerticesCount = getVerticesCount(service.getGraph());
beforeEdgesCount = getEdgesCount(service.getGraph());
Feed join2Feed = addFeedEntity("imp-click-join2", clusterEntity,
"classified-as=Secure,classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join2/${YEAR}${MONTH}${DAY}");
outputFeeds.add(join2Feed);
verifyEntityWasAddedToGraph(join2Feed.getName(), RelationshipType.FEED_ENTITY);
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 1); // +1 feed
// +6 = user + 2tags + 2Groups + Cluster
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 6);
}
@Test (dependsOnMethods = "testOnAddFeedEntity")
public void testOnAddProcessEntity() throws Exception {
// Get the before vertices and edges
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
processEntity = addProcessEntity(PROCESS_ENTITY_NAME, clusterEntity,
"classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME,
WORKFLOW_VERSION, inputFeeds, outputFeeds);
verifyEntityWasAddedToGraph(processEntity.getName(), RelationshipType.PROCESS_ENTITY);
verifyProcessEntityEdges();
// +4 = 1 process + 1 tag + 2 pipeline
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 4);
// +9 = user,tag,cluster, 2 inputs,2 outputs, 2 pipelines
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 9);
}
@Test (dependsOnMethods = "testOnAddProcessEntity")
public void testOnAdd() throws Exception {
verifyEntityGraph(RelationshipType.FEED_ENTITY, "Secure");
}
@Test (dependsOnMethods = "testOnAdd")
// @Test (dependsOnMethods = "testLineageForRetentionWithNoFeedsEvicted")
public void testOnChange() throws Exception {
// shutdown the graph and resurrect for testing
service.destroy();
service.init();
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
// cannot modify cluster, adding a new cluster
anotherCluster = addClusterEntity("another-cluster", "east-coast",
"classification=another");
verifyEntityWasAddedToGraph("another-cluster", RelationshipType.CLUSTER_ENTITY);
// +3 = cluster, colo, tag, user (but user falcon-user is already added so ignore from count)
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 3);
// +3 edges to user, colo and new tag
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 3);
}
@Test(dependsOnMethods = "testOnChange")
public void testOnFeedEntityChange() throws Exception {
Feed oldFeed = inputFeeds.get(0);
Feed newFeed = EntityBuilderTestUtil.buildFeed(oldFeed.getName(), clusterEntity,
"classified-as=Secured,source=data-warehouse", "reporting");
addStorage(newFeed, Storage.TYPE.FILESYSTEM,
"jail://global:00/falcon/impression-feed/20140101");
long beforeVerticesCount = 0;
long beforeEdgesCount = 0;
try {
configStore.initiateUpdate(newFeed);
beforeVerticesCount = getVerticesCount(service.getGraph());
beforeEdgesCount = getEdgesCount(service.getGraph());
// add cluster
org.apache.falcon.entity.v0.feed.Cluster feedCluster =
new org.apache.falcon.entity.v0.feed.Cluster();
feedCluster.setName(anotherCluster.getName());
newFeed.getClusters().getClusters().add(feedCluster);
configStore.update(EntityType.FEED, newFeed);
} finally {
configStore.cleanupUpdateInit();
}
verifyUpdatedEdges(newFeed);
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 2); //+2 = 2 new tags
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 2); // +2 = 1 new cluster, 1 new tag
}
@Test(dependsOnMethods = "testOnFeedEntityChange")
public void testOnProcessEntityChange() throws Exception {
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
Process oldProcess = processEntity;
Process newProcess = EntityBuilderTestUtil.buildProcess(oldProcess.getName(), anotherCluster,
null, null);
EntityBuilderTestUtil.addProcessWorkflow(newProcess, GENERATE_WORKFLOW_NAME, "2.0.0");
EntityBuilderTestUtil.addInput(newProcess, inputFeeds.get(0));
try {
configStore.initiateUpdate(newProcess);
configStore.update(EntityType.PROCESS, newProcess);
} finally {
configStore.cleanupUpdateInit();
}
verifyUpdatedEdges(newProcess);
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 0); // +0, no net new
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount - 6); // -6 = -2 outputs, -1 tag,
// -1 cluster, -2 pipelines
}
@Test(dependsOnMethods = "testOnProcessEntityChange")
public void testAreSame() throws Exception {
Inputs inputs1 = new Inputs();
Inputs inputs2 = new Inputs();
Outputs outputs1 = new Outputs();
Outputs outputs2 = new Outputs();
// return true when both are null
Assert.assertTrue(EntityRelationshipGraphBuilder.areSame(inputs1, inputs2));
Assert.assertTrue(EntityRelationshipGraphBuilder.areSame(outputs1, outputs2));
Input i1 = new Input();
i1.setName("input1");
Input i2 = new Input();
i2.setName("input2");
Output o1 = new Output();
o1.setName("output1");
Output o2 = new Output();
o2.setName("output2");
inputs1.getInputs().add(i1);
Assert.assertFalse(EntityRelationshipGraphBuilder.areSame(inputs1, inputs2));
outputs1.getOutputs().add(o1);
Assert.assertFalse(EntityRelationshipGraphBuilder.areSame(outputs1, outputs2));
inputs2.getInputs().add(i1);
Assert.assertTrue(EntityRelationshipGraphBuilder.areSame(inputs1, inputs2));
outputs2.getOutputs().add(o1);
Assert.assertTrue(EntityRelationshipGraphBuilder.areSame(outputs1, outputs2));
}
@Test(dependsOnMethods = "testAreSame")
public void testOnClusterEntityChange() throws Exception {
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
Cluster oldCluster = clusterEntity;
Cluster newCluster = EntityBuilderTestUtil.buildCluster(oldCluster.getName(),
"clusterUpdateColo", oldCluster.getTags() + ",clusterUpdateTagKey=clusterUpdateTagVal");
try {
configStore.initiateUpdate(newCluster);
configStore.update(EntityType.CLUSTER, newCluster);
} finally {
configStore.cleanupUpdateInit();
}
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 2); // +1 new tag +1 new colo
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 1); // +1 new tag edge
Vertex newClusterVertex = getEntityVertex(newCluster.getName(), RelationshipType.CLUSTER_ENTITY);
verifyVertexForEdge(newClusterVertex, Direction.OUT, RelationshipLabel.CLUSTER_COLO.getName(),
"clusterUpdateColo", RelationshipType.COLO.getName());
}
@Test (dependsOnMethods = "testOnClusterEntityChange")
public void testMapLineage() throws Exception {
setup();
// Get the before vertices and edges
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, null, null, null, null)
, WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
verifyLineageGraph(RelationshipType.FEED_INSTANCE.getName());
// +6 = 1 process, 2 inputs = 3 instances,2 outputs
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 6);
//+40 = +26 for feed instances + 8 for process instance + 6 for second feed instance
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 40);
}
@Test (dependsOnMethods = "testMapLineage")
public void testLineageForNoDateInFeedPath() throws Exception {
setupForNoDateInFeedPath();
// Get the before vertices and edges
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, null,
OUTPUT_INSTANCE_PATHS_NO_DATE, INPUT_INSTANCE_PATHS_NO_DATE, null),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
// Verify if instance name has nominal time
List<String> feedNamesOwnedByUser = getFeedsOwnedByAUser(
RelationshipType.FEED_INSTANCE.getName());
List<String> expected = Arrays.asList("impression-feed/2014-01-01T01:00Z", "clicks-feed/2014-01-01T01:00Z",
"imp-click-join1/2014-01-01T01:00Z", "imp-click-join2/2014-01-01T01:00Z");
Assert.assertTrue(feedNamesOwnedByUser.containsAll(expected));
// +5 = 1 process, 2 inputs, 2 outputs
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 5);
//+34 = +26 for feed instances + 8 for process instance
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 34);
}
@Test (dependsOnMethods = "testLineageForNoDateInFeedPath")
public void testLineageForReplication() throws Exception {
setupForLineageReplication();
// Get the before vertices and edges
// +7 [primary, bcp cluster] = cluster, colo, tag, user
// +3 [input feed] = feed, tag, group
// +4 [output feed] = 1 feed + 1 tag + 2 groups
// +4 [process] = 1 process + 1 tag + 2 pipeline
// +3 = 1 process, 1 input, 1 output
long beforeVerticesCount = getVerticesCount(service.getGraph());
// +4 [cluster] = cluster to colo and tag [primary and bcp],
// +4 [input feed] = cluster, tag, group, user
// +5 [output feed] = cluster + user + Group + 2Tags
// +7 = user,tag,cluster, 1 input,1 output, 2 pipelines
// +19 = +6 for output feed instances + 7 for process instance + 6 for input feed instance
long beforeEdgesCount = getEdgesCount(service.getGraph());
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.REPLICATE, REPLICATION_WORKFLOW_NAME, REPLICATED_FEED,
"jail://global:00/falcon/raw-click/bcp/20140101",
"jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
verifyLineageGraphForReplicationOrEviction(REPLICATED_FEED,
"jail://global:00/falcon/raw-click/bcp/20140101", context,
RelationshipLabel.FEED_CLUSTER_REPLICATED_EDGE);
// No new vertex added after replication
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 0);
// +1 for replicated-to edge to target cluster for each output feed instance
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 1);
}
@Test (dependsOnMethods = "testLineageForReplication")
public void testLineageForReplicationForNonGeneratedInstances() throws Exception {
cleanUp();
service.init();
addClusterAndFeedForReplication(inputFeeds);
// Get the vertices before running replication WF
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.REPLICATE, REPLICATION_WORKFLOW_NAME, REPLICATED_FEED,
"jail://global:00/falcon/raw-click/bcp/20140101",
"jail://global:00/falcon/raw-click/primary/20140101", REPLICATED_FEED),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
verifyFeedEntityEdges(REPLICATED_FEED, "Secure", "analytics");
verifyLineageGraphForReplicationOrEviction(REPLICATED_FEED,
"jail://global:00/falcon/raw-click/bcp/20140101", context,
RelationshipLabel.FEED_CLUSTER_REPLICATED_EDGE);
// +1 for the new instance vertex added
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 1);
// +6 = instance-of, stored-in, owned-by, classification, group, replicated-to
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 6);
}
@Test (dependsOnMethods = "testLineageForReplicationForNonGeneratedInstances")
public void testLineageForRetention() throws Exception {
setupForLineageEviction();
// Get the before vertices and edges
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.DELETE, EVICTION_WORKFLOW_NAME,
EVICTED_FEED, EVICTED_INSTANCE_PATHS, "IGNORE", EVICTED_FEED),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
List<String> expectedFeeds = Arrays.asList("impression-feed/2014-01-01T00:00Z", "clicks-feed/2014-01-01T00:00Z",
"imp-click-join1/2014-01-01T00:00Z", "imp-click-join1/2014-01-02T00:00Z");
List<String> secureFeeds = Arrays.asList("impression-feed/2014-01-01T00:00Z",
"clicks-feed/2014-01-01T00:00Z");
List<String> ownedAndSecureFeeds = Arrays.asList("clicks-feed/2014-01-01T00:00Z",
"imp-click-join1/2014-01-01T00:00Z", "imp-click-join1/2014-01-02T00:00Z");
verifyLineageGraph(RelationshipType.FEED_INSTANCE.getName(), expectedFeeds, secureFeeds, ownedAndSecureFeeds);
String[] paths = EVICTED_INSTANCE_PATHS.split(EvictedInstanceSerDe.INSTANCEPATH_SEPARATOR);
for (String feedInstanceDataPath : paths) {
verifyLineageGraphForReplicationOrEviction(EVICTED_FEED, feedInstanceDataPath, context,
RelationshipLabel.FEED_CLUSTER_EVICTED_EDGE);
}
// No new vertices added
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount + 0);
// +2 for evicted-from edge from Feed Instance vertex to cluster
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount + 2);
}
@Test (dependsOnMethods = "testLineageForRetention")
public void testLineageForRetentionWithNoFeedsEvicted() throws Exception {
cleanUp();
service.init();
long beforeVerticesCount = getVerticesCount(service.getGraph());
long beforeEdgesCount = getEdgesCount(service.getGraph());
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.DELETE, EVICTION_WORKFLOW_NAME,
EVICTED_FEED, "IGNORE", "IGNORE", EVICTED_FEED),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
// No new vertices added
Assert.assertEquals(getVerticesCount(service.getGraph()), beforeVerticesCount);
// No new edges added
Assert.assertEquals(getEdgesCount(service.getGraph()), beforeEdgesCount);
}
private String printProps(Vertex ignored) {
StringBuilder sb = new StringBuilder();
for(String p : ignored.getPropertyKeys()) {
sb.append(p).append("->").append(ignored.getProperty(p) + ";");
}
return sb.toString();
}
@Test (dependsOnMethods = "testLineageForRetentionWithNoFeedsEvicted")
public void testLineageForTransactionFailure() throws Exception {
cleanUp();
service.init();
clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME,
"classification=production");
verifyEntityWasAddedToGraph(CLUSTER_ENTITY_NAME, RelationshipType.CLUSTER_ENTITY);
verifyClusterEntityEdges();
Assert.assertEquals(getVerticesCount(service.getGraph()), 4); // +3 = cluster, colo, user, tag
Assert.assertEquals(getEdgesCount(service.getGraph()), 3); // +2 = cluster to colo, user and tag
Feed feed = EntityBuilderTestUtil.buildFeed("feed-name", new Cluster[]{clusterEntity}, null, null);
inputFeeds.add(feed);
outputFeeds.add(feed);
try {
processEntity = addProcessEntity(PROCESS_ENTITY_NAME, clusterEntity,
"classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME,
WORKFLOW_VERSION, inputFeeds, outputFeeds);
Assert.fail();
} catch (FalconException e) {
Assert.assertEquals(getVerticesCount(service.getGraph()), 4);
Assert.assertEquals(getEdgesCount(service.getGraph()), 3);
}
}
private void verifyUpdatedEdges(Feed newFeed) {
Vertex feedVertex = getEntityVertex(newFeed.getName(), RelationshipType.FEED_ENTITY);
// groups
Edge edge = feedVertex.getEdges(Direction.OUT, RelationshipLabel.GROUPS.getName()).iterator().next();
Assert.assertEquals(edge.getVertex(Direction.IN).getProperty("name"), "reporting");
// tags
edge = feedVertex.getEdges(Direction.OUT, "classified-as").iterator().next();
Assert.assertEquals(edge.getVertex(Direction.IN).getProperty("name"), "Secured");
edge = feedVertex.getEdges(Direction.OUT, "source").iterator().next();
Assert.assertEquals(edge.getVertex(Direction.IN).getProperty("name"), "data-warehouse");
// new cluster
List<String> actual = new ArrayList<>();
for (Edge clusterEdge : feedVertex.getEdges(Direction.OUT, RelationshipLabel.FEED_CLUSTER_EDGE.getName())) {
actual.add(clusterEdge.getVertex(Direction.IN).<String>getProperty("name"));
}
Assert.assertTrue(actual.containsAll(Arrays.asList("primary-cluster", "another-cluster")),
"Actual does not contain expected: " + actual);
}
@Test (dependsOnMethods = "testLineageForRetentionWithNoFeedsEvicted")
public void testLineageForJobCounter() throws Exception {
setupForJobCounters();
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, "IGNORE", "IGNORE", "IGNORE", "NONE"),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
debug(service.getGraph());
GraphUtils.dump(service.getGraph());
Graph graph = service.getGraph();
Vertex vertex = graph.getVertices("name", "sample-process/2014-01-01T01:00Z").iterator().next();
Assert.assertEquals(vertex.getProperty("TIMETAKEN"), 36956L);
Assert.assertEquals(vertex.getProperty("COPY"), 30L);
Assert.assertEquals(vertex.getProperty("BYTESCOPIED"), 1000L);
Assert.assertEquals(getVerticesCount(service.getGraph()), 9);
Assert.assertEquals(getEdgesCount(service.getGraph()), 14);
verifyLineageGraphForJobCounters(context);
}
private void verifyUpdatedEdges(Process newProcess) {
Vertex processVertex = getEntityVertex(newProcess.getName(), RelationshipType.PROCESS_ENTITY);
// cluster
Edge edge = processVertex.getEdges(Direction.OUT,
RelationshipLabel.PROCESS_CLUSTER_EDGE.getName()).iterator().next();
Assert.assertEquals(edge.getVertex(Direction.IN).getProperty("name"), anotherCluster.getName());
// inputs
edge = processVertex.getEdges(Direction.IN, RelationshipLabel.FEED_PROCESS_EDGE.getName()).iterator().next();
Assert.assertEquals(edge.getVertex(Direction.OUT).getProperty("name"),
newProcess.getInputs().getInputs().get(0).getFeed());
// outputs
for (Edge e : processVertex.getEdges(Direction.OUT, RelationshipLabel.PROCESS_FEED_EDGE.getName())) {
Assert.fail("there should not be any edges to output feeds" + e);
}
}
public static void debug(final Graph graph) {
System.out.println("*****Vertices of " + graph);
for (Vertex vertex : graph.getVertices()) {
System.out.println(GraphUtils.vertexString(vertex));
}
System.out.println("*****Edges of " + graph);
for (Edge edge : graph.getEdges()) {
System.out.println(GraphUtils.edgeString(edge));
}
}
private Cluster addClusterEntity(String name, String colo, String tags) throws Exception {
Cluster cluster = EntityBuilderTestUtil.buildCluster(name, colo, tags);
configStore.publish(EntityType.CLUSTER, cluster);
return cluster;
}
private Feed addFeedEntity(String feedName, Cluster cluster, String tags, String groups,
Storage.TYPE storageType, String uriTemplate) throws Exception {
return addFeedEntity(feedName, new Cluster[]{cluster}, tags, groups, storageType, uriTemplate);
}
private Feed addFeedEntity(String feedName, Cluster[] clusters, String tags, String groups,
Storage.TYPE storageType, String uriTemplate) throws Exception {
Feed feed = EntityBuilderTestUtil.buildFeed(feedName, clusters,
tags, groups);
addStorage(feed, storageType, uriTemplate);
for (org.apache.falcon.entity.v0.feed.Cluster feedCluster : feed.getClusters().getClusters()) {
if (feedCluster.getName().equals(BCP_CLUSTER_ENTITY_NAME)) {
feedCluster.setType(ClusterType.TARGET);
}
}
configStore.publish(EntityType.FEED, feed);
return feed;
}
//SUSPEND CHECKSTYLE CHECK ParameterNumberCheck
public Process addProcessEntity(String processName, Cluster cluster,
String tags, String pipelineTags, String workflowName,
String version, List<Feed> inFeeds, List<Feed> outFeeds) throws Exception {
Process process = EntityBuilderTestUtil.buildProcess(processName, cluster,
tags, pipelineTags);
EntityBuilderTestUtil.addProcessWorkflow(process, workflowName, version);
for (Feed inputFeed : inFeeds) {
EntityBuilderTestUtil.addInput(process, inputFeed);
}
for (Feed outputFeed : outFeeds) {
EntityBuilderTestUtil.addOutput(process, outputFeed);
}
configStore.publish(EntityType.PROCESS, process);
return process;
}
//RESUME CHECKSTYLE CHECK ParameterNumberCheck
private static void addStorage(Feed feed, Storage.TYPE storageType, String uriTemplate) {
if (storageType == Storage.TYPE.FILESYSTEM) {
Locations locations = new Locations();
feed.setLocations(locations);
Location location = new Location();
location.setType(LocationType.DATA);
location.setPath(uriTemplate);
feed.getLocations().getLocations().add(location);
} else {
CatalogTable table = new CatalogTable();
table.setUri(uriTemplate);
feed.setTable(table);
}
}
private static void addStorage(org.apache.falcon.entity.v0.feed.Cluster cluster, Feed feed,
Storage.TYPE storageType, String uriTemplate) {
if (storageType == Storage.TYPE.FILESYSTEM) {
Locations locations = new Locations();
feed.setLocations(locations);
Location location = new Location();
location.setType(LocationType.DATA);
location.setPath(uriTemplate);
cluster.setLocations(new Locations());
cluster.getLocations().getLocations().add(location);
} else {
CatalogTable table = new CatalogTable();
table.setUri(uriTemplate);
cluster.setTable(table);
}
}
private void verifyEntityWasAddedToGraph(String entityName, RelationshipType entityType) {
Vertex entityVertex = getEntityVertex(entityName, entityType);
Assert.assertNotNull(entityVertex);
verifyEntityProperties(entityVertex, entityName, entityType);
}
private void verifyEntityProperties(Vertex entityVertex, String entityName, RelationshipType entityType) {
Assert.assertEquals(entityName, entityVertex.getProperty(RelationshipProperty.NAME.getName()));
Assert.assertEquals(entityType.getName(), entityVertex.getProperty(RelationshipProperty.TYPE.getName()));
Assert.assertNotNull(entityVertex.getProperty(RelationshipProperty.TIMESTAMP.getName()));
}
private void verifyClusterEntityEdges() {
Vertex clusterVertex = getEntityVertex(CLUSTER_ENTITY_NAME,
RelationshipType.CLUSTER_ENTITY);
// verify edge to user vertex
verifyVertexForEdge(clusterVertex, Direction.OUT, RelationshipLabel.USER.getName(),
FALCON_USER, RelationshipType.USER.getName());
// verify edge to colo vertex
verifyVertexForEdge(clusterVertex, Direction.OUT, RelationshipLabel.CLUSTER_COLO.getName(),
COLO_NAME, RelationshipType.COLO.getName());
// verify edge to tags vertex
verifyVertexForEdge(clusterVertex, Direction.OUT, "classification",
"production", RelationshipType.TAGS.getName());
}
private void verifyFeedEntityEdges(String feedName, String tag, String group) {
Vertex feedVertex = getEntityVertex(feedName, RelationshipType.FEED_ENTITY);
// verify edge to cluster vertex
verifyVertexForEdge(feedVertex, Direction.OUT, RelationshipLabel.FEED_CLUSTER_EDGE.getName(),
CLUSTER_ENTITY_NAME, RelationshipType.CLUSTER_ENTITY.getName());
// verify edge to user vertex
verifyVertexForEdge(feedVertex, Direction.OUT, RelationshipLabel.USER.getName(),
FALCON_USER, RelationshipType.USER.getName());
// verify edge to tags vertex
verifyVertexForEdge(feedVertex, Direction.OUT, "classified-as",
tag, RelationshipType.TAGS.getName());
// verify edge to group vertex
verifyVertexForEdge(feedVertex, Direction.OUT, RelationshipLabel.GROUPS.getName(),
group, RelationshipType.GROUPS.getName());
}
private void verifyProcessEntityEdges() {
Vertex processVertex = getEntityVertex(PROCESS_ENTITY_NAME,
RelationshipType.PROCESS_ENTITY);
// verify edge to cluster vertex
verifyVertexForEdge(processVertex, Direction.OUT, RelationshipLabel.PROCESS_CLUSTER_EDGE.getName(),
CLUSTER_ENTITY_NAME, RelationshipType.CLUSTER_ENTITY.getName());
// verify edge to user vertex
verifyVertexForEdge(processVertex, Direction.OUT, RelationshipLabel.USER.getName(),
FALCON_USER, RelationshipType.USER.getName());
// verify edge to tags vertex
verifyVertexForEdge(processVertex, Direction.OUT, "classified-as",
"Critical", RelationshipType.TAGS.getName());
// verify edges to inputs
List<String> actual = new ArrayList<>();
for (Edge edge : processVertex.getEdges(Direction.IN,
RelationshipLabel.FEED_PROCESS_EDGE.getName())) {
Vertex outVertex = edge.getVertex(Direction.OUT);
Assert.assertEquals(RelationshipType.FEED_ENTITY.getName(),
outVertex.getProperty(RelationshipProperty.TYPE.getName()));
actual.add(outVertex.<String>getProperty(RelationshipProperty.NAME.getName()));
}
Assert.assertTrue(actual.containsAll(Arrays.asList("impression-feed", "clicks-feed")),
"Actual does not contain expected: " + actual);
actual.clear();
// verify edges to outputs
for (Edge edge : processVertex.getEdges(Direction.OUT,
RelationshipLabel.PROCESS_FEED_EDGE.getName())) {
Vertex outVertex = edge.getVertex(Direction.IN);
Assert.assertEquals(RelationshipType.FEED_ENTITY.getName(),
outVertex.getProperty(RelationshipProperty.TYPE.getName()));
actual.add(outVertex.<String>getProperty(RelationshipProperty.NAME.getName()));
}
Assert.assertTrue(actual.containsAll(Arrays.asList("imp-click-join1", "imp-click-join2")),
"Actual does not contain expected: " + actual);
}
private Vertex getEntityVertex(String entityName, RelationshipType entityType) {
GraphQuery entityQuery = getQuery()
.has(RelationshipProperty.NAME.getName(), entityName)
.has(RelationshipProperty.TYPE.getName(), entityType.getName());
Iterator<Vertex> iterator = entityQuery.vertices().iterator();
Assert.assertTrue(iterator.hasNext());
Vertex entityVertex = iterator.next();
Assert.assertNotNull(entityVertex);
return entityVertex;
}
private void verifyVertexForEdge(Vertex fromVertex, Direction direction, String label,
String expectedName, String expectedType) {
boolean found = false;
for (Edge edge : fromVertex.getEdges(direction, label)) {
found = true;
Vertex outVertex = edge.getVertex(Direction.IN);
Assert.assertEquals(
outVertex.getProperty(RelationshipProperty.NAME.getName()), expectedName);
Assert.assertEquals(
outVertex.getProperty(RelationshipProperty.TYPE.getName()), expectedType);
}
Assert.assertFalse((!found), "Edge not found");
}
private void verifyEntityGraph(RelationshipType feedType, String classification) {
// feeds owned by a user
List<String> feedNamesOwnedByUser = getFeedsOwnedByAUser(feedType.getName());
Assert.assertEquals(feedNamesOwnedByUser,
Arrays.asList("impression-feed", "clicks-feed", "imp-click-join1",
"imp-click-join2")
);
// feeds classified as secure
verifyFeedsClassifiedAsSecure(feedType.getName(),
Arrays.asList("impression-feed", "clicks-feed", "imp-click-join2"));
// feeds owned by a user and classified as secure
verifyFeedsOwnedByUserAndClassification(feedType.getName(), classification,
Arrays.asList("impression-feed", "clicks-feed", "imp-click-join2"));
}
private List<String> getFeedsOwnedByAUser(String feedType) {
GraphQuery userQuery = getQuery()
.has(RelationshipProperty.NAME.getName(), FALCON_USER)
.has(RelationshipProperty.TYPE.getName(), RelationshipType.USER.getName());
List<String> feedNames = new ArrayList<>();
for (Vertex userVertex : userQuery.vertices()) {
for (Vertex feed : userVertex.getVertices(Direction.IN, RelationshipLabel.USER.getName())) {
if (feed.getProperty(RelationshipProperty.TYPE.getName()).equals(feedType)) {
System.out.println(FALCON_USER + " owns -> " + GraphUtils.vertexString(feed));
feedNames.add(feed.<String>getProperty(RelationshipProperty.NAME.getName()));
}
}
}
return feedNames;
}
private void verifyFeedsClassifiedAsSecure(String feedType, List<String> expected) {
GraphQuery classQuery = getQuery()
.has(RelationshipProperty.NAME.getName(), "Secure")
.has(RelationshipProperty.TYPE.getName(), RelationshipType.TAGS.getName());
List<String> actual = new ArrayList<>();
for (Vertex feedVertex : classQuery.vertices()) {
for (Vertex feed : feedVertex.getVertices(Direction.BOTH, "classified-as")) {
if (feed.getProperty(RelationshipProperty.TYPE.getName()).equals(feedType)) {
System.out.println(" Secure classification -> " + GraphUtils.vertexString(feed));
actual.add(feed.<String>getProperty(RelationshipProperty.NAME.getName()));
}
}
}
Assert.assertTrue(actual.containsAll(expected), "Actual does not contain expected: " + actual);
}
private void verifyFeedsOwnedByUserAndClassification(String feedType, String classification,
List<String> expected) {
List<String> actual = new ArrayList<>();
Vertex userVertex = getEntityVertex(FALCON_USER, RelationshipType.USER);
for (Vertex feed : userVertex.getVertices(Direction.IN, RelationshipLabel.USER.getName())) {
if (feed.getProperty(RelationshipProperty.TYPE.getName()).equals(feedType)) {
for (Vertex classVertex : feed.getVertices(Direction.OUT, "classified-as")) {
if (classVertex.getProperty(RelationshipProperty.NAME.getName())
.equals(classification)) {
actual.add(feed.<String>getProperty(RelationshipProperty.NAME.getName()));
System.out.println(classification + " feed owned by falcon-user -> "
+ GraphUtils.vertexString(feed));
}
}
}
}
Assert.assertTrue(actual.containsAll(expected),
"Actual does not contain expected: " + actual);
}
public long getVerticesCount(final Graph graph) {
long count = 0;
for (Vertex ignored : graph.getVertices()) {
count++;
}
return count;
}
public long getEdgesCount(final Graph graph) {
long count = 0;
for (Edge ignored : graph.getEdges()) {
count++;
}
return count;
}
private void verifyLineageGraph(String feedType) {
List<String> expectedFeeds = Arrays.asList("impression-feed/2014-01-01T00:00Z", "clicks-feed/2014-01-01T00:00Z",
"imp-click-join1/2014-01-01T00:00Z", "imp-click-join2/2014-01-01T00:00Z");
List<String> secureFeeds = Arrays.asList("impression-feed/2014-01-01T00:00Z",
"clicks-feed/2014-01-01T00:00Z", "imp-click-join2/2014-01-01T00:00Z");
List<String> ownedAndSecureFeeds = Arrays.asList("clicks-feed/2014-01-01T00:00Z",
"imp-click-join1/2014-01-01T00:00Z", "imp-click-join2/2014-01-01T00:00Z");
verifyLineageGraph(feedType, expectedFeeds, secureFeeds, ownedAndSecureFeeds);
}
private void verifyLineageGraph(String feedType, List<String> expectedFeeds,
List<String> secureFeeds, List<String> ownedAndSecureFeeds) {
// feeds owned by a user
List<String> feedNamesOwnedByUser = getFeedsOwnedByAUser(feedType);
Assert.assertTrue(feedNamesOwnedByUser.containsAll(expectedFeeds));
Graph graph = service.getGraph();
Iterator<Vertex> vertices = graph.getVertices("name", "impression-feed/2014-01-01T00:00Z").iterator();
Assert.assertTrue(vertices.hasNext());
Vertex feedInstanceVertex = vertices.next();
Assert.assertEquals(feedInstanceVertex.getProperty(RelationshipProperty.TYPE.getName()),
RelationshipType.FEED_INSTANCE.getName());
Object vertexId = feedInstanceVertex.getId();
Vertex vertexById = graph.getVertex(vertexId);
Assert.assertEquals(vertexById, feedInstanceVertex);
// feeds classified as secure
verifyFeedsClassifiedAsSecure(feedType, secureFeeds);
// feeds owned by a user and classified as secure
verifyFeedsOwnedByUserAndClassification(feedType, "Financial", ownedAndSecureFeeds);
}
private void verifyLineageGraphForReplicationOrEviction(String feedName, String feedInstanceDataPath,
WorkflowExecutionContext context,
RelationshipLabel edgeLabel) throws Exception {
String feedInstanceName = InstanceRelationshipGraphBuilder.getFeedInstanceName(feedName
, context.getClusterName(), feedInstanceDataPath, context.getNominalTimeAsISO8601());
Vertex feedVertex = getEntityVertex(feedInstanceName, RelationshipType.FEED_INSTANCE);
Edge edge = feedVertex.getEdges(Direction.OUT, edgeLabel.getName())
.iterator().next();
Assert.assertNotNull(edge);
Assert.assertEquals(edge.getProperty(RelationshipProperty.TIMESTAMP.getName())
, context.getTimeStampAsISO8601());
Vertex clusterVertex = edge.getVertex(Direction.IN);
Assert.assertEquals(clusterVertex.getProperty(RelationshipProperty.NAME.getName()), context.getClusterName());
}
private void verifyLineageGraphForJobCounters(WorkflowExecutionContext context) throws Exception {
Vertex processVertex = getEntityVertex(PROCESS_ENTITY_NAME,
RelationshipType.PROCESS_ENTITY);
Assert.assertEquals(processVertex.getProperty("name"), PROCESS_ENTITY_NAME);
Assert.assertTrue(context.getCounters().length()>0);
}
private static String[] getTestMessageArgs(EntityOperations operation, String wfName, String outputFeedNames,
String feedInstancePaths, String falconInputPaths,
String falconInputFeeds) {
String cluster;
if (EntityOperations.REPLICATE == operation) {
cluster = BCP_CLUSTER_ENTITY_NAME + WorkflowExecutionContext.CLUSTER_NAME_SEPARATOR + CLUSTER_ENTITY_NAME;
} else {
cluster = CLUSTER_ENTITY_NAME;
}
return new String[]{
"-" + WorkflowExecutionArgs.CLUSTER_NAME.getName(), cluster,
"-" + WorkflowExecutionArgs.ENTITY_TYPE.getName(), ("process"),
"-" + WorkflowExecutionArgs.ENTITY_NAME.getName(), PROCESS_ENTITY_NAME,
"-" + WorkflowExecutionArgs.NOMINAL_TIME.getName(), NOMINAL_TIME,
"-" + WorkflowExecutionArgs.OPERATION.getName(), operation.toString(),
"-" + WorkflowExecutionArgs.INPUT_FEED_NAMES.getName(),
(falconInputFeeds != null ? falconInputFeeds : INPUT_FEED_NAMES),
"-" + WorkflowExecutionArgs.INPUT_FEED_PATHS.getName(),
(falconInputPaths != null ? falconInputPaths : INPUT_INSTANCE_PATHS),
"-" + WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName(),
(outputFeedNames != null ? outputFeedNames : OUTPUT_FEED_NAMES),
"-" + WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName(),
(feedInstancePaths != null ? feedInstancePaths : OUTPUT_INSTANCE_PATHS),
"-" + WorkflowExecutionArgs.WORKFLOW_ID.getName(), "workflow-01-00",
"-" + WorkflowExecutionArgs.WORKFLOW_USER.getName(), FALCON_USER,
"-" + WorkflowExecutionArgs.RUN_ID.getName(), "1",
"-" + WorkflowExecutionArgs.STATUS.getName(), "SUCCEEDED",
"-" + WorkflowExecutionArgs.TIMESTAMP.getName(), NOMINAL_TIME,
"-" + WorkflowExecutionArgs.WF_ENGINE_URL.getName(), "http://localhost:11000/oozie",
"-" + WorkflowExecutionArgs.USER_SUBFLOW_ID.getName(), "userflow@wf-id",
"-" + WorkflowExecutionArgs.USER_WORKFLOW_NAME.getName(), wfName,
"-" + WorkflowExecutionArgs.USER_WORKFLOW_VERSION.getName(), WORKFLOW_VERSION,
"-" + WorkflowExecutionArgs.USER_WORKFLOW_ENGINE.getName(), EngineType.PIG.name(),
"-" + WorkflowExecutionArgs.BRKR_IMPL_CLASS.getName(), BROKER,
"-" + WorkflowExecutionArgs.BRKR_URL.getName(), "tcp://localhost:61616?daemon=true",
"-" + WorkflowExecutionArgs.USER_BRKR_IMPL_CLASS.getName(), BROKER,
"-" + WorkflowExecutionArgs.USER_BRKR_URL.getName(), "tcp://localhost:61616?daemon=true",
"-" + WorkflowExecutionArgs.BRKR_TTL.getName(), "1000",
"-" + WorkflowExecutionArgs.LOG_DIR.getName(), LOGS_DIR,
};
}
private void setupForJobCounters() throws Exception {
cleanUp();
service.init();
// Add cluster
clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME,
"classification=production");
List<Feed> inFeeds = new ArrayList<>();
List<Feed> outFeeds = new ArrayList<>();
createJobCountersFileForTest();
// Add process
processEntity = addProcessEntity(PROCESS_ENTITY_NAME, clusterEntity,
"classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME,
WORKFLOW_VERSION, inFeeds, outFeeds);
}
private void createJobCountersFileForTest() throws Exception {
Path counterFile = new Path(LOGS_DIR, "counter.txt");
OutputStream out = null;
try {
FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(
new Path(LOGS_DIR).toUri());
out = fs.create(counterFile);
out.write(COUNTERS.getBytes());
out.flush();
} finally {
out.close();
}
}
private void setup() throws Exception {
cleanUp();
service.init();
// Add cluster
clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME,
"classification=production");
addFeedsAndProcess(clusterEntity);
}
private void addFeedsAndProcess(Cluster cluster) throws Exception {
// Add input and output feeds
Feed impressionsFeed = addFeedEntity("impression-feed", cluster,
"classified-as=Secure", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/impression-feed/${YEAR}/${MONTH}/${DAY}");
List<Feed> inFeeds = new ArrayList<>();
List<Feed> outFeeds = new ArrayList<>();
inFeeds.add(impressionsFeed);
Feed clicksFeed = addFeedEntity("clicks-feed", cluster,
"classified-as=Secure,classified-as=Financial", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/clicks-feed/${YEAR}-${MONTH}-${DAY}");
inFeeds.add(clicksFeed);
Feed join1Feed = addFeedEntity("imp-click-join1", cluster,
"classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join1/${YEAR}${MONTH}${DAY}");
outFeeds.add(join1Feed);
Feed join2Feed = addFeedEntity("imp-click-join2", cluster,
"classified-as=Secure,classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join2/${YEAR}${MONTH}${DAY}");
outFeeds.add(join2Feed);
processEntity = addProcessEntity(PROCESS_ENTITY_NAME, clusterEntity,
"classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME,
WORKFLOW_VERSION, inFeeds, outFeeds);
}
private void setupForLineageReplication() throws Exception {
cleanUp();
service.init();
List<Feed> inFeeds = new ArrayList<>();
List<Feed> outFeeds = new ArrayList<>();
addClusterAndFeedForReplication(inFeeds);
// Add output feed
Feed join1Feed = addFeedEntity("imp-click-join1", clusterEntity,
"classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join1/${YEAR}${MONTH}${DAY}");
outFeeds.add(join1Feed);
processEntity = addProcessEntity(PROCESS_ENTITY_NAME, clusterEntity,
"classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME,
WORKFLOW_VERSION, inFeeds, outFeeds);
// GENERATE WF should have run before this to create all instance related vertices
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME, "imp-click-join1",
"jail://global:00/falcon/imp-click-join1/20140101",
"jail://global:00/falcon/raw-click/primary/20140101",
REPLICATED_FEED), WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
}
private void addClusterAndFeedForReplication(List<Feed> inFeeds) throws Exception {
// Add cluster
clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME,
"classification=production");
// Add backup cluster
Cluster bcpCluster = addClusterEntity(BCP_CLUSTER_ENTITY_NAME, "east-coast", "classification=bcp");
Cluster[] clusters = {clusterEntity, bcpCluster};
// Add feed
Feed rawFeed = addFeedEntity(REPLICATED_FEED, clusters,
"classified-as=Secure", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/raw-click/${YEAR}/${MONTH}/${DAY}");
// Add uri template for each cluster
for (org.apache.falcon.entity.v0.feed.Cluster feedCluster : rawFeed.getClusters().getClusters()) {
if (feedCluster.getName().equals(CLUSTER_ENTITY_NAME)) {
addStorage(feedCluster, rawFeed, Storage.TYPE.FILESYSTEM,
"/falcon/raw-click/primary/${YEAR}/${MONTH}/${DAY}");
} else {
addStorage(feedCluster, rawFeed, Storage.TYPE.FILESYSTEM,
"/falcon/raw-click/bcp/${YEAR}/${MONTH}/${DAY}");
}
}
// update config store
try {
configStore.initiateUpdate(rawFeed);
configStore.update(EntityType.FEED, rawFeed);
} finally {
configStore.cleanupUpdateInit();
}
inFeeds.add(rawFeed);
}
private void setupForLineageEviction() throws Exception {
setup();
// GENERATE WF should have run before this to create all instance related vertices
WorkflowExecutionContext context = WorkflowExecutionContext.create(getTestMessageArgs(
EntityOperations.GENERATE, GENERATE_WORKFLOW_NAME,
"imp-click-join1,imp-click-join1", EVICTED_INSTANCE_PATHS, null, null),
WorkflowExecutionContext.Type.POST_PROCESSING);
service.onSuccess(context);
}
private void setupForNoDateInFeedPath() throws Exception {
cleanUp();
service.init();
// Add cluster
clusterEntity = addClusterEntity(CLUSTER_ENTITY_NAME, COLO_NAME,
"classification=production");
List<Feed> inFeeds = new ArrayList<>();
List<Feed> outFeeds = new ArrayList<>();
// Add input and output feeds
Feed impressionsFeed = addFeedEntity("impression-feed", clusterEntity,
"classified-as=Secure", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/impression-feed");
inFeeds.add(impressionsFeed);
Feed clicksFeed = addFeedEntity("clicks-feed", clusterEntity,
"classified-as=Secure,classified-as=Financial", "analytics", Storage.TYPE.FILESYSTEM,
"/falcon/clicks-feed");
inFeeds.add(clicksFeed);
Feed join1Feed = addFeedEntity("imp-click-join1", clusterEntity,
"classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join1");
outFeeds.add(join1Feed);
Feed join2Feed = addFeedEntity("imp-click-join2", clusterEntity,
"classified-as=Secure,classified-as=Financial", "reporting,bi", Storage.TYPE.FILESYSTEM,
"/falcon/imp-click-join2");
outFeeds.add(join2Feed);
processEntity = addProcessEntity(PROCESS_ENTITY_NAME, clusterEntity,
"classified-as=Critical", "testPipeline,dataReplication_Pipeline", GENERATE_WORKFLOW_NAME,
WORKFLOW_VERSION, inFeeds, outFeeds);
}
private void cleanUp() throws Exception {
cleanupGraphStore(service.getGraph());
cleanupConfigurationStore(configStore);
service.destroy();
}
private void cleanupGraphStore(Graph graph) {
for (Edge edge : graph.getEdges()) {
graph.removeEdge(edge);
}
for (Vertex vertex : graph.getVertices()) {
graph.removeVertex(vertex);
}
graph.shutdown();
}
private static void cleanupConfigurationStore(ConfigurationStore store) throws Exception {
for (EntityType type : EntityType.values()) {
Collection<String> entities = store.getEntities(type);
for (String entity : entities) {
store.remove(type, entity);
}
}
}
}