/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.regression.triage;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.regression.Entities.ProcessMerlin;
import org.apache.falcon.regression.core.bundle.Bundle;
import org.apache.falcon.regression.core.enumsAndConstants.ResponseErrors;
import org.apache.falcon.regression.core.helpers.ColoHelper;
import org.apache.falcon.regression.core.util.InstanceUtil;
import org.apache.falcon.regression.core.util.BundleUtil;
import org.apache.falcon.regression.core.util.TimeUtil;
import org.apache.falcon.regression.core.util.OSUtil;
import org.apache.falcon.regression.core.util.OozieUtil;
import org.apache.falcon.regression.core.util.AssertUtil;
import org.apache.falcon.regression.core.util.EntityLineageUtil;
import org.apache.falcon.regression.testHelper.BaseTestClass;
import org.apache.falcon.resource.LineageGraphResult.Edge;
import org.apache.falcon.resource.SchedulableEntityInstance;
import org.apache.falcon.resource.TriageResult;
import org.apache.oozie.client.CoordinatorAction;
import org.apache.oozie.client.OozieClient;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Test Class for Testing the Triage API on a single colo corresponding to FALCON-1377.
*/
@Test(groups = { "distributed", "embedded", "sanity" })
public class TriageAPISingleColoTest extends BaseTestClass {
private ColoHelper cluster = servers.get(0);
private OozieClient clusterOC = serverOC.get(0);
private String baseTestHDFSDir = cleanAndGetTestDir();
private String aggregateWorkflowDir = baseTestHDFSDir + "/aggregator";
private String outputFeedName, processName, clusterName;
private String startTime, endTime;
private String feedInputPath = baseTestHDFSDir + "/input" + MINUTE_DATE_PATTERN;
private String feedOutputPath = baseTestHDFSDir + "/output-data" + MINUTE_DATE_PATTERN;
private List<String> expectedVertexList = new ArrayList<>();
private List<Edge> expectedEdgeList = new ArrayList<>();
private TriageResult responseTriage;
@BeforeMethod(alwaysRun = true)
public void setUp() throws Exception {
startTime = "2015-01-02T00:00Z";
endTime = "2015-01-02T00:03Z";
bundles[0] = BundleUtil.readELBundle();
bundles[0].generateUniqueBundle(this);
bundles[0] = new Bundle(bundles[0], cluster);
bundles[0].setProcessWorkflow(aggregateWorkflowDir);
bundles[0].setProcessPeriodicity(5, Frequency.TimeUnit.minutes);
bundles[0].setProcessValidity(startTime, endTime);
bundles[0].setProcessConcurrency(1);
bundles[0].setInputFeedDataPath(feedInputPath);
bundles[0].setOutputFeedLocationData(feedOutputPath);
processName = bundles[0].getProcessName();
clusterName = bundles[0].getClusterNames().get(0);
outputFeedName = bundles[0].getOutputFeedNameFromBundle();
uploadDirToClusters(aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE);
expectedEdgeList = new ArrayList<>();
expectedVertexList = new ArrayList<>();
}
@AfterMethod(alwaysRun = true)
public void tearDown() throws IOException {
removeTestClassEntities();
cleanTestsDirs();
}
/**
* Creates expected output based on entity type, and then calls the validation function
* to compare expected and actual graphs.
* @param entityType type of entity, whether process or feed
*/
private void createExpectedOutput(EntityType entityType) throws Exception{
String finalInstanceTag;
AssertUtil.assertSucceeded(responseTriage);
Assert.assertEquals(responseTriage.getTriageGraphs().length, 1);
String inputVertex1 = createVertex(bundles[0].getInputFeedNameFromBundle(),
TimeUtil.addMinsToTime(startTime, -20), EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex1);
String inputVertex2 = createVertex(bundles[0].getInputFeedNameFromBundle(), startTime,
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex2);
if (entityType.equals(EntityType.PROCESS)) {
finalInstanceTag = "[WAITING]";
} else {
finalInstanceTag = "Output[WAITING]";
}
String processVertex = createVertex(processName, startTime, EntityType.PROCESS, finalInstanceTag);
expectedVertexList.add(processVertex);
if (entityType.equals(EntityType.FEED)) {
String outputVertex1 = createVertex(outputFeedName, startTime,
EntityType.FEED, "[MISSING]");
expectedVertexList.add(outputVertex1);
expectedEdgeList.add(new Edge(processVertex, outputVertex1, "produces"));
}
expectedEdgeList.add(new Edge(inputVertex1, processVertex, "consumed by"));
expectedEdgeList.add(new Edge(inputVertex2, processVertex, "consumed by"));
EntityLineageUtil.validateLineageGraphResult(responseTriage.getTriageGraphs()[0],
expectedVertexList.toArray(new String[expectedVertexList.size()]),
expectedEdgeList.toArray(new Edge[expectedEdgeList.size()]));
}
/**
* Single process with one input and one output, of which one instance is in waiting, and request for triage
* on that instance on the server. There should be no output feed and the process instance is the terminal
* instance. Upon triaging on server on an output instance of the feed, an additional vertex and edge should
* be seen for this feed instance.
*
* @throws Exception
*/
@Test(dataProvider = "getParameters", groups = "embedded")
public void triageTestServer(EntityType entityType) throws Exception {
bundles[0].submitFeedsScheduleProcess();
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
if (entityType.equals(EntityType.FEED)) {
responseTriage = cluster.getFeedHelper().getInstanceTriage(outputFeedName,
"?start=" + startTime);
} else {
responseTriage = cluster.getProcessHelper().getInstanceTriage(processName,
"?start=" + startTime);
}
//Creating expected vertices and graphs
createExpectedOutput(entityType);
}
/**
* Single process with one input and one output, of which one instance is in waiting, and request for triage
* on that instance on the server. There should be no output feed and the process instance is the terminal
* instance. Upon triaging on prism on an output instance of the feed, an additional vertex and edge should
* be seen for this feed instance.
*
* @throws Exception
*/
@Test(dataProvider = "getParameters", groups = "distributed")
public void triageTestPrism(EntityType entityType) throws Exception {
bundles[0].submitFeedsScheduleProcess();
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
if (entityType.equals(EntityType.FEED)) {
responseTriage = prism.getFeedHelper().getInstanceTriage(outputFeedName,
"?start=" + startTime);
} else {
responseTriage = prism.getProcessHelper().getInstanceTriage(processName,
"?start=" + startTime);
}
//Creating expected vertices and graphs
createExpectedOutput(entityType);
}
/**
* Single process with one input and one output, but we triage on a non-existent feed/process
* instance on the server. Appropriate error should be thrown.
*
* @throws Exception
*/
@Test(dataProvider = "getParameters", groups = "embedded")
public void invalidInstanceOnServerTest(EntityType entityType) throws Exception {
bundles[0].submitFeedsScheduleProcess();
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
if (entityType.equals(EntityType.FEED)) {
responseTriage = cluster.getFeedHelper().getInstanceTriage(outputFeedName,
"?start=" + TimeUtil.addMinsToTime(startTime, 2));
} else {
responseTriage = cluster.getProcessHelper().getInstanceTriage(processName,
"?start=" + TimeUtil.addMinsToTime(startTime, 2));
}
EntityLineageUtil.validateError(responseTriage, ResponseErrors.INVALID_INSTANCE_TIME);
}
/**
* Single process with one input and one output, but we triage on a non-existent feed/process
* instance on the prism. Appropriate error should be thrown.
*
* @throws Exception
*/
@Test(dataProvider = "getParameters", groups = "distributed")
public void invalidInstanceOnPrismTest(EntityType entityType) throws Exception {
bundles[0].submitFeedsScheduleProcess();
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
if (entityType.equals(EntityType.FEED)) {
responseTriage = prism.getFeedHelper().getInstanceTriage(outputFeedName,
"?start=" + TimeUtil.addMinsToTime(startTime, 2));
} else {
responseTriage = prism.getProcessHelper().getInstanceTriage(processName,
"?start=" + TimeUtil.addMinsToTime(startTime, 2));
}
EntityLineageUtil.validateError(responseTriage, ResponseErrors.INVALID_INSTANCE_TIME);
}
/**
* Submit and Schedule a process on one cluster via prism, and triage on an instance of the process on a different
* cluster. Appropriate error should be thrown.
*
* @throws Exception
*/
@Test(groups = "embedded")
public void processTriageOnServerWhereProcessDoesNotExistTest() throws Exception {
responseTriage = servers.get(1).getProcessHelper().getInstanceTriage(processName,
"?start=" + startTime);
EntityLineageUtil.validateError(responseTriage, ResponseErrors.PROCESS_NOT_FOUND);
}
/**
* Single process with one input and one output which succeeds. Triage on server, on a succeeded instance,
* and we should get just one vertex in the graph, without any edges.
*
* @throws Exception
*/
@Test(groups = "embedded")
public void processInstanceSucceededTriageOnServerTest() throws Exception {
bundles[0].submitFeedsScheduleProcess();
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0);
InstanceUtil.waitTillInstanceReachState(clusterOC, processName, 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS, 5);
responseTriage = cluster.getProcessHelper().getInstanceTriage(processName,
"?start=" + startTime);
AssertUtil.assertSucceeded(responseTriage);
Assert.assertEquals(responseTriage.getTriageGraphs().length, 1);
//There'll be just one process instance vertex and no edges
Assert.assertEquals(responseTriage.getTriageGraphs()[0].getVertices().length, 1);
String processVertex = createVertex(processName, startTime, EntityType.PROCESS, "[SUCCEEDED]");
expectedVertexList.add(processVertex);
EntityLineageUtil.validateLineageGraphResult(responseTriage.getTriageGraphs()[0],
expectedVertexList.toArray(new String[expectedVertexList.size()]),
expectedEdgeList.toArray(new Edge[expectedEdgeList.size()]));
}
/**
* Single process with one input and one output which succeeds. Triage on prism, on a succeeded instance,
* and we should get just one vertex in the graph, without any edges.
*
* @throws Exception
*/
@Test(groups = "distributed")
public void processInstanceSucceededTriageOnPrismTest() throws Exception {
bundles[0].submitFeedsScheduleProcess();
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0);
OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0);
InstanceUtil.waitTillInstanceReachState(clusterOC, processName, 1,
CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS, 5);
responseTriage = prism.getProcessHelper().getInstanceTriage(processName,
"?start=" + startTime);
AssertUtil.assertSucceeded(responseTriage);
Assert.assertEquals(responseTriage.getTriageGraphs().length, 1);
//There'll be just one process instance vertex and no edges
Assert.assertEquals(responseTriage.getTriageGraphs()[0].getVertices().length, 1);
String processVertex = createVertex(processName, startTime, EntityType.PROCESS,
"[SUCCEEDED]");
expectedVertexList.add(processVertex);
EntityLineageUtil.validateLineageGraphResult(responseTriage.getTriageGraphs()[0],
expectedVertexList.toArray(new String[expectedVertexList.size()]),
expectedEdgeList.toArray(new Edge[expectedEdgeList.size()]));
}
/**
* Single process one instance of whose output feed is fed as input to the process. This is to test the closed
* loop condition, in case we triage on a process instance, or on an output feed instance.
*
* @throws Exception
*/
@Test(groups = "distributed")
public void cycleTest() throws Exception {
//Setting an instance of the output feed of a process as input to the process
bundles[0].addProcessInput("inputData2", outputFeedName);
ProcessMerlin processObj = new ProcessMerlin(bundles[0].getProcessData());
processObj.getInputs().getInputs().get(1).setStart("now(-1,0)");
processObj.getInputs().getInputs().get(1).setEnd("now(-1,0)");
bundles[0].setProcessData(processObj.toString());
bundles[0].submitFeedsScheduleProcess();
responseTriage = prism.getFeedHelper().getInstanceTriage(outputFeedName,
"?start=" + startTime);
AssertUtil.assertSucceeded(responseTriage);
Assert.assertEquals(responseTriage.getTriageGraphs().length, 1);
//There'll be four feed instance vertices and one process instance vertex
String inputVertex1 = createVertex(bundles[0].getInputFeedNameFromBundle(),
TimeUtil.addMinsToTime(startTime, -20), EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex1);
String inputVertex2 = createVertex(bundles[0].getInputFeedNameFromBundle(), startTime,
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex2);
String inputVertex3 = createVertex(outputFeedName, TimeUtil.addMinsToTime(startTime, -60),
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex3);
String processVertex = createVertex(processName, startTime, EntityType.PROCESS,
"Output[WAITING]");
expectedVertexList.add(processVertex);
String outputVertex1 = createVertex(outputFeedName, startTime,
EntityType.FEED, "[MISSING]");
expectedVertexList.add(outputVertex1);
expectedEdgeList.add(new Edge(inputVertex1, processVertex, "consumed by"));
expectedEdgeList.add(new Edge(inputVertex2, processVertex, "consumed by"));
expectedEdgeList.add(new Edge(inputVertex3, processVertex, "consumed by"));
expectedEdgeList.add(new Edge(processVertex, outputVertex1, "produces"));
EntityLineageUtil.validateLineageGraphResult(responseTriage.getTriageGraphs()[0],
expectedVertexList.toArray(new String[expectedVertexList.size()]),
expectedEdgeList.toArray(new Edge[expectedEdgeList.size()]));
responseTriage = prism.getProcessHelper().getInstanceTriage(processName, "?start=" + startTime);
AssertUtil.assertSucceeded(responseTriage);
Assert.assertEquals(responseTriage.getTriageGraphs().length, 1);
//There'll be three feed instance vertices and one process instance vertex
expectedVertexList = new ArrayList<>();
expectedEdgeList = new ArrayList<>();
inputVertex1 = createVertex(bundles[0].getInputFeedNameFromBundle(), TimeUtil.addMinsToTime(startTime, -20),
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex1);
inputVertex2 = createVertex(bundles[0].getInputFeedNameFromBundle(), startTime,
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex2);
inputVertex3 = createVertex(outputFeedName, TimeUtil.addMinsToTime(startTime, -60),
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex3);
processVertex = createVertex(processName, startTime, EntityType.PROCESS,
"[WAITING]");
expectedVertexList.add(processVertex);
expectedEdgeList.add(new Edge(inputVertex1, processVertex, "consumed by"));
expectedEdgeList.add(new Edge(inputVertex2, processVertex, "consumed by"));
expectedEdgeList.add(new Edge(inputVertex3, processVertex, "consumed by"));
EntityLineageUtil.validateLineageGraphResult(responseTriage.getTriageGraphs()[0],
expectedVertexList.toArray(new String[expectedVertexList.size()]),
expectedEdgeList.toArray(new Edge[expectedEdgeList.size()]));
}
/**
* Two Dependent processes, where one consumes the output of the other. Triage on the output of the
* second process.
*
* @throws Exception
*/
@Test(groups = "distributed")
public void twoDependentProcessesTest() throws Exception {
bundles[0].submitFeedsScheduleProcess(); //this process will stay in waiting
//There'll be three feed instance vertices (2 input, 1 output) and two process instance vertices
String inputVertex1 = createVertex(bundles[0].getInputFeedNameFromBundle(),
TimeUtil.addMinsToTime(startTime, -20), EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex1);
String inputVertex2 = createVertex(bundles[0].getInputFeedNameFromBundle(), startTime,
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(inputVertex2);
String processVertex1 = createVertex(processName, startTime, EntityType.PROCESS,
"Output[WAITING]");
expectedVertexList.add(processVertex1);
String outputVertex1 = createVertex(outputFeedName, startTime,
EntityType.FEED, "Input[MISSING]");
expectedVertexList.add(outputVertex1);
//preparing second process
bundles[0].setProcessValidity(TimeUtil.addMinsToTime(startTime, 60), TimeUtil.addMinsToTime(startTime, 61));
ProcessMerlin processObj = new ProcessMerlin(bundles[0].getProcessData());
processObj.getOutputs().getOutputs().get(0).setFeed(processObj.getInputs().getInputs().get(0).getFeed());
//The input of this process is the output of the previous process
processObj.getInputs().getInputs().get(0).setFeed(outputFeedName);
processObj.setName("ConsumerOfFirstProcessOutput");
bundles[0].setProcessData(processObj.toString());
bundles[0].setProcessInputStartEnd("now(-1,0)", "now(-1,0)");
bundles[0].setProcessWorkflow(aggregateWorkflowDir);
bundles[0].setProcessPeriodicity(5, Frequency.TimeUnit.minutes);
bundles[0].submitFeedsScheduleProcess();
responseTriage = prism.getFeedHelper().getInstanceTriage(bundles[0].getOutputFeedNameFromBundle(),
"?start=" + TimeUtil.addMinsToTime(startTime, 60));
AssertUtil.assertSucceeded(responseTriage);
Assert.assertEquals(responseTriage.getTriageGraphs().length, 1);
//Adding one more process vertex and output feed vertex
String processVertex2 = createVertex("ConsumerOfFirstProcessOutput", TimeUtil.addMinsToTime(startTime, 60),
EntityType.PROCESS, "Output[WAITING]");
expectedVertexList.add(processVertex2);
String outputVertex2 = createVertex(bundles[0].getOutputFeedNameFromBundle(),
TimeUtil.addMinsToTime(startTime, 60), EntityType.FEED, "[MISSING]");
expectedVertexList.add(outputVertex2);
expectedEdgeList.add(new Edge(inputVertex1, processVertex1, "consumed by"));
expectedEdgeList.add(new Edge(inputVertex2, processVertex1, "consumed by"));
expectedEdgeList.add(new Edge(processVertex1, outputVertex1, "produces"));
expectedEdgeList.add(new Edge(outputVertex1, processVertex2, "consumed by"));
expectedEdgeList.add(new Edge(processVertex2, outputVertex2, "produces"));
EntityLineageUtil.validateLineageGraphResult(responseTriage.getTriageGraphs()[0],
expectedVertexList.toArray(new String[expectedVertexList.size()]),
expectedEdgeList.toArray(new Edge[expectedEdgeList.size()]));
}
/**
* Data Provider enables the same test to run for triage on entities feed and process.
*/
@DataProvider
private Object[][] getParameters() {
return new Object[][]{{EntityType.FEED}, {EntityType.PROCESS}};
}
/**
* Creates a vertex out of the fields provided.
* @param name name of process/feed
* @param instanceTime instance time
* @param entityType if entity is process or feed
* @param tags status of the feed
*/
private String createVertex(String name, String instanceTime, EntityType entityType, String tags) {
SchedulableEntityInstance vertex = new SchedulableEntityInstance(name, clusterName,
SchemaHelper.parseDateUTC(instanceTime), entityType);
vertex.setTags(tags);
return vertex.toString();
}
}