/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.regression.triage; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.Frequency; import org.apache.falcon.regression.Entities.FeedMerlin; import org.apache.falcon.regression.Entities.ProcessMerlin; import org.apache.falcon.regression.core.bundle.Bundle; import org.apache.falcon.regression.core.helpers.ColoHelper; import org.apache.falcon.regression.core.helpers.entity.AbstractEntityHelper; import org.apache.falcon.regression.core.util.AssertUtil; import org.apache.falcon.regression.core.util.BundleUtil; import org.apache.falcon.regression.core.util.EntityLineageUtil; import org.apache.falcon.regression.core.util.EntityLineageUtil.PipelineEntityType; import org.apache.falcon.regression.core.util.HadoopUtil; import org.apache.falcon.regression.core.util.InstanceUtil; import org.apache.falcon.regression.core.util.OSUtil; import org.apache.falcon.regression.core.util.TimeUtil; import org.apache.falcon.regression.testHelper.BaseTestClass; import org.apache.falcon.resource.APIResult; import org.apache.falcon.resource.InstanceDependencyResult; import org.apache.falcon.resource.LineageGraphResult; import org.apache.falcon.resource.LineageGraphResult.Edge; import org.apache.falcon.resource.SchedulableEntityInstance; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.log4j.Logger; import org.apache.oozie.client.CoordinatorAction; import org.apache.oozie.client.OozieClient; import org.joda.time.DateTime; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.IOException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Test for instance dependency endpoint. */ @Test(groups = { "distributed", "embedded", "sanity" }) public class PipelineInstanceDependencyTest extends BaseTestClass { private ColoHelper cluster = servers.get(0); private FileSystem clusterFS = serverFS.get(0); private OozieClient clusterOC = serverOC.get(0); private String baseTestHDFSDir = cleanAndGetTestDir(); private String aggregateWorkflowDir = baseTestHDFSDir + "/aggregator"; private String feedInputPath = baseTestHDFSDir + "/input" + MINUTE_DATE_PATTERN; private String feedOutputPath = baseTestHDFSDir + "/output-data" + MINUTE_DATE_PATTERN; private final String startTimeStr = "2010-01-02T01:00Z"; private final DateTime startTime = TimeUtil.oozieDateToDate(startTimeStr); private final String endTimeStr = "2010-01-02T01:11Z"; private List<String> inputFeedNames, outputFeedNames, processNames; private List<Integer> inputFeedFrequencies; private static final Logger LOGGER = Logger.getLogger(PipelineInstanceDependencyTest.class); private String clusterName; private static final Comparator<SchedulableEntityInstance> DEPENDENCY_COMPARATOR = new Comparator<SchedulableEntityInstance>() { @Override public int compare(SchedulableEntityInstance o1, SchedulableEntityInstance o2) { int tagDiff = o1.getTags().compareTo(o2.getTags()); if (tagDiff != 0) { return tagDiff; } int clusterDiff = o1.getCluster().compareTo(o2.getCluster()); if (clusterDiff != 0) { return clusterDiff; } int typeDiff = o1.getEntityType().compareTo(o2.getEntityType()); if (typeDiff != 0) { return typeDiff; } int dateDiff = o1.getInstanceTime().compareTo(o2.getInstanceTime()); if (dateDiff != 0) { return dateDiff; } return 0; } }; private final Comparator<Edge> edgeComparator = new Comparator<Edge>() { @Override public int compare(Edge o1, Edge o2) { return o1.toString().compareTo(o2.toString()); } }; @BeforeClass(alwaysRun = true) public void createTestData() throws Exception { LOGGER.info("in @BeforeClass"); HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE); } /** * The scenario that we will setup looks like:<br> * inputFeed1 -> process1 -> outputFeed1 -> process2 -> outputFeed2 -> process3 -> outputFeed3. * @throws Exception */ @BeforeMethod(alwaysRun = true) public void setup() throws Exception { bundles[0] = BundleUtil.readELBundle(); bundles[0] = new Bundle(bundles[0], cluster); bundles[0].generateUniqueBundle(this); clusterName = bundles[0].getClusterNames().get(0); bundles[0].setProcessWorkflow(aggregateWorkflowDir); bundles[0].setProcessValidity(startTimeStr, endTimeStr); bundles[0].setProcessPeriodicity(5, Frequency.TimeUnit.minutes); bundles[0].setInputFeedDataPath(feedInputPath); bundles[0].setOutputFeedPeriodicity(5, Frequency.TimeUnit.minutes); bundles[0].setOutputFeedLocationData(feedOutputPath); bundles[0].submitFeedsScheduleProcess(prism); final String oldInputFeedName = bundles[0].getInputFeedNameFromBundle(); final String oldOutputFeedName = bundles[0].getOutputFeedNameFromBundle(); final String oldProcessName = bundles[0].getProcessName(); inputFeedFrequencies = Arrays.asList(20, 5, 5); inputFeedNames = Arrays.asList(oldInputFeedName, oldOutputFeedName, oldOutputFeedName + "-2"); outputFeedNames = Arrays.asList(oldOutputFeedName, oldOutputFeedName + "-2", oldOutputFeedName + "-3"); processNames = Arrays.asList(oldProcessName, oldProcessName + "-2", oldProcessName + "-3"); List<String> feedOutputPaths = Arrays.asList( feedOutputPath, baseTestHDFSDir + "/output-data-2" + MINUTE_DATE_PATTERN, baseTestHDFSDir + "/output-data-3" + MINUTE_DATE_PATTERN ); //create second, third process that consumes output of bundle[0] for (int bIndex = 1; bIndex < 3; ++bIndex) { final FeedMerlin outputFeed = new FeedMerlin(bundles[0].getOutputFeedFromBundle()); final ProcessMerlin processMerlin = bundles[0].getProcessObject(); processMerlin.setName(processNames.get(bIndex)); outputFeed.setDataLocationPath(feedOutputPaths.get(bIndex)); outputFeed.setName(outputFeedNames.get(bIndex)); //rename output feeds before renaming input feeds processMerlin.renameFeeds(Collections.singletonMap(oldOutputFeedName, outputFeedNames.get(bIndex))); processMerlin.renameFeeds(Collections.singletonMap(oldInputFeedName, inputFeedNames.get(bIndex))); AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(outputFeed.toString())); AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(processMerlin.toString())); } for (int index = 0; index < 3; ++index) { InstanceUtil.waitTillInstanceReachState(clusterOC, processNames.get(index), 3, CoordinatorAction.Status.WAITING, EntityType.PROCESS, 5); } LOGGER.info(inputFeedNames.get(0) + "(" + inputFeedFrequencies.get(0) + ") -> *" + processNames.get(0)+ "* -> " + inputFeedNames.get(1) + "(" + inputFeedFrequencies.get(1) + ") -> *" + processNames.get(1)+ "* -> " + inputFeedNames.get(2) + "(" + inputFeedFrequencies.get(2) + ") -> *" + processNames.get(2)+ "* -> " + outputFeedNames.get(2)); } @AfterMethod(alwaysRun = true) public void tearDown() { removeTestClassEntities(); } @Test public void processInstanceDependencyTest() throws Exception { final DateTime startTimeMinus20 = startTime.minusMinutes(20); for (int index = 0; index < 3; ++index) { List<SchedulableEntityInstance> expectedDependencies = new ArrayList<>(); final SchedulableEntityInstance outputInstance = new SchedulableEntityInstance(outputFeedNames.get(index), clusterName, startTime.toDate(), EntityType.FEED); outputInstance.setTags("Output"); expectedDependencies.add(outputInstance); for (DateTime dt = new DateTime(startTime); !dt.isBefore(startTimeMinus20); dt = dt.minusMinutes(inputFeedFrequencies.get(index))) { final SchedulableEntityInstance inputInstance = new SchedulableEntityInstance(inputFeedNames.get(index), clusterName, dt.toDate(), EntityType.FEED); inputInstance.setTags("Input"); expectedDependencies.add(inputInstance); } InstanceDependencyResult r = prism.getProcessHelper().getInstanceDependencies(processNames.get(index), "?instanceTime=" + startTimeStr, null); List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies()); Collections.sort(expectedDependencies, DEPENDENCY_COMPARATOR); Collections.sort(actualDependencies, DEPENDENCY_COMPARATOR); Assert.assertEquals(actualDependencies, expectedDependencies, "Unexpected dependencies for process: " + processNames.get(index)); } } @Test public void inputFeedInstanceDependencyTest() throws Exception { final String inputFeedToTest = inputFeedNames.get(1); final DateTime endTime = TimeUtil.oozieDateToDate(endTimeStr); List<SchedulableEntityInstance> expectedDependencies = new ArrayList<>(); final SchedulableEntityInstance outputInstance = new SchedulableEntityInstance(processNames.get(0), clusterName, startTime.toDate(), EntityType.PROCESS); outputInstance.setTags("Output"); expectedDependencies.add(outputInstance); final int processFrequency = 5; for (DateTime dt = new DateTime(startTime); !dt.isAfter(endTime); dt = dt.plusMinutes(processFrequency)) { final SchedulableEntityInstance inputInstance = new SchedulableEntityInstance(processNames.get(1), clusterName, dt.toDate(), EntityType.PROCESS); inputInstance.setTags("Input"); expectedDependencies.add(inputInstance); } InstanceDependencyResult r = prism.getFeedHelper().getInstanceDependencies(inputFeedToTest, "?instanceTime=" + startTimeStr, null); List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies()); Collections.sort(expectedDependencies, DEPENDENCY_COMPARATOR); Collections.sort(actualDependencies, DEPENDENCY_COMPARATOR); Assert.assertEquals(actualDependencies, expectedDependencies, "Unexpected dependencies for process: " + inputFeedToTest); } @Test public void outputFeedInstanceDependencyTest() throws Exception { final String outputFeedToTest = outputFeedNames.get(1); final DateTime endTime = TimeUtil.oozieDateToDate(endTimeStr); List<SchedulableEntityInstance> expectedDependencies = new ArrayList<>(); final SchedulableEntityInstance outputInstance = new SchedulableEntityInstance(processNames.get(1), clusterName, startTime.toDate(), EntityType.PROCESS); outputInstance.setTags("Output"); expectedDependencies.add(outputInstance); final int processFrequency = 5; for (DateTime dt = new DateTime(startTime); !dt.isAfter(endTime); dt = dt.plusMinutes(processFrequency)) { final SchedulableEntityInstance inputInstance = new SchedulableEntityInstance(processNames.get(2), clusterName, dt.toDate(), EntityType.PROCESS); inputInstance.setTags("Input"); expectedDependencies.add(inputInstance); } InstanceDependencyResult r = prism.getFeedHelper().getInstanceDependencies(outputFeedToTest, "?instanceTime=" + startTimeStr, null); List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies()); Collections.sort(expectedDependencies, DEPENDENCY_COMPARATOR); Collections.sort(actualDependencies, DEPENDENCY_COMPARATOR); Assert.assertEquals(actualDependencies, expectedDependencies, "Unexpected dependencies for process: " + outputFeedToTest); } /** * Particular check for https://issues.apache.org/jira/browse/FALCON-1317. */ @Test public void testInstanceDependencySingleElement() throws URISyntaxException, AuthenticationException, InterruptedException, IOException { InstanceDependencyResult r = prism.getFeedHelper().getInstanceDependencies(outputFeedNames.get(2), "?instanceTime=" + startTimeStr, null); Assert.assertEquals(r.getStatus(), APIResult.Status.SUCCEEDED, "Request shouldn't fail."); List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies()); Assert.assertEquals(actualDependencies.size(), 1, "There should be single dependency element."); } /** * Run triage for different pipeline feeds and processes. * @param bundleInd pipeline bundle * @param entityType process or feed */ @Test(dataProvider = "getParameters") public void testTriageInstance(int bundleInd, EntityType entityType) throws URISyntaxException, AuthenticationException, InterruptedException, IOException { AbstractEntityHelper helper; String entityName; if (entityType == EntityType.FEED) { helper = prism.getFeedHelper(); entityName = outputFeedNames.get(bundleInd); } else { helper = prism.getProcessHelper(); entityName = processNames.get(bundleInd); } Map<PipelineEntityType, List<String>> entitiesNames = new HashMap<>(); entitiesNames.put(PipelineEntityType.PROCESS, processNames); entitiesNames.put(PipelineEntityType.INPUT_FEED, inputFeedNames); entitiesNames.put(PipelineEntityType.OUTPUT_FEED, outputFeedNames); LineageGraphResult expected = EntityLineageUtil.getExpectedResult(bundleInd, entitiesNames, inputFeedFrequencies, entityName, clusterName, startTimeStr); LineageGraphResult actual = helper.getInstanceTriage(entityName, "?start=" + startTimeStr).getTriageGraphs()[0]; final List<String> expectedVertices = new ArrayList<>(Arrays.asList(expected.getVertices())); final List<Edge> expectedEdges = new ArrayList<>(Arrays.asList(expected.getEdges())); final List<String> actualVertices = Arrays.asList(actual.getVertices()); final List<Edge> actualEdges = Arrays.asList(actual.getEdges()); Collections.sort(actualVertices); Collections.sort(expectedVertices); Collections.sort(actualEdges, edgeComparator); Collections.sort(expectedEdges, edgeComparator); Assert.assertEquals(actualVertices, expectedVertices, "Actual vertices & expected vertices in triage graph don't match"); Assert.assertEquals(actualEdges, expectedEdges, "Actual edges & expected edges in triage graph don't match"); } @DataProvider public Object[][] getParameters() { return new Object[][]{ {0, EntityType.FEED}, {0, EntityType.PROCESS}, {1, EntityType.FEED}, {1, EntityType.PROCESS}, {2, EntityType.FEED}, {2, EntityType.PROCESS}, }; } }