/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.regression; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.Frequency; import org.apache.falcon.regression.core.bundle.Bundle; import org.apache.falcon.regression.core.helpers.ColoHelper; import org.apache.falcon.regression.core.response.ServiceResponse; import org.apache.falcon.regression.core.util.BundleUtil; import org.apache.falcon.regression.core.util.HadoopUtil; import org.apache.falcon.regression.core.util.InstanceUtil; import org.apache.falcon.regression.core.util.Util; import org.apache.falcon.regression.core.util.OozieUtil; import org.apache.falcon.regression.core.util.OSUtil; import org.apache.falcon.regression.core.util.AssertUtil; import org.apache.falcon.regression.testHelper.BaseTestClass; import org.apache.falcon.resource.FeedInstanceResult; import org.apache.hadoop.fs.FileSystem; import org.apache.log4j.Logger; import org.apache.oozie.client.CoordinatorAction; import org.apache.oozie.client.OozieClient; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; /** * Test for https://issues.apache.org/jira/browse/FALCON-761. */ @Test(groups = { "distributed", "embedded" }, timeOut = 900000) public class FeedInstanceListingTest extends BaseTestClass{ private String baseTestDir = cleanAndGetTestDir(); private String aggregateWorkflowDir = baseTestDir + "/aggregator"; private String feedInputPath = baseTestDir + "/input" + MINUTE_DATE_PATTERN; private String feedOutputPath = baseTestDir + "/output-data" + MINUTE_DATE_PATTERN; private String processName; private ColoHelper cluster = servers.get(0); private FileSystem clusterFS = serverFS.get(0); private OozieClient clusterOC = serverOC.get(0); private static final Logger LOGGER = Logger.getLogger(FeedInstanceListingTest.class); @BeforeMethod(alwaysRun = true) public void setup() throws Exception { bundles[0] = BundleUtil.readELBundle(); bundles[0] = new Bundle(bundles[0], cluster); bundles[0].generateUniqueBundle(this); bundles[0].setProcessWorkflow(aggregateWorkflowDir); bundles[0].setInputFeedDataPath(feedInputPath); bundles[0].setInputFeedPeriodicity(5, Frequency.TimeUnit.minutes); bundles[0].setOutputFeedPeriodicity(5, Frequency.TimeUnit.minutes); bundles[0].setOutputFeedLocationData(feedOutputPath); bundles[0].setProcessPeriodicity(5, Frequency.TimeUnit.minutes); processName = bundles[0].getProcessName(); HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE); } @AfterMethod(alwaysRun = true) public void tearDown() throws IOException{ cleanTestsDirs(); removeTestClassEntities(); } /** * Test when all data is available for all instances. */ @Test public void testFeedListingWhenAllAvailable() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); List<List<String>> missingDependencies = OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); List<String> missingDependencyLastInstance = missingDependencies.get(missingDependencies.size()-1); HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance); InstanceUtil.waitTillInstanceReachState(clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 0, 5); } /** *Test when only empty directories exist for all instances. */ @Test public void testFeedListingWhenAllEmpty() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); InstanceUtil.waitTillInstanceReachState(clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 5, 0, 0); } /** * Test when no data is present for any instance. */ @Test public void testFeedListingWhenAllMissing() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); FeedInstanceResult r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 5, 0, 0, 0); } /** * Initially no availability flag is set for the feed. And data is created, so instance status is available. * Then, set the availability flag and update the feed. The instance status should change to partial. */ @Test public void testFeedListingAfterFeedAvailabilityFlagUpdate() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); List<List<String>> missingDependencies = OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); List<String> missingDependencyLastInstance = missingDependencies.get(missingDependencies.size()-1); HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance); InstanceUtil.waitTillInstanceReachState(clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 0, 5); String inputFeed = bundles[0].getInputFeedFromBundle(); bundles[0].setInputFeedAvailabilityFlag("_SUCCESS"); ServiceResponse serviceResponse = prism.getFeedHelper().update(inputFeed, bundles[0].getInputFeedFromBundle()); AssertUtil.assertSucceeded(serviceResponse); //Since we have not created availability flag on HDFS, the feed instance status should be partial r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 5, 0); } /** * Data is created for the feed, so instance status is available. * Then, change the data path and update the feed. The instance status should change to partial. */ @Test public void testFeedListingAfterFeedDataPathUpdate() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); List<List<String>> missingDependencies = OozieUtil.createMissingDependencies(cluster, EntityType.PROCESS, processName, 0); List<String> missingDependencyLastInstance = missingDependencies.get(missingDependencies.size()-1); HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.SINGLE_FILE, missingDependencyLastInstance); InstanceUtil.waitTillInstanceReachState(clusterOC, processName, 1, CoordinatorAction.Status.RUNNING, EntityType.PROCESS, 5); FeedInstanceResult r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 0, 0, 0, 5); String inputFeed = bundles[0].getInputFeedFromBundle(); bundles[0].setInputFeedDataPath(baseTestDir + "/inputNew" + MINUTE_DATE_PATTERN); ServiceResponse serviceResponse = prism.getFeedHelper().update(inputFeed, bundles[0].getInputFeedFromBundle()); AssertUtil.assertSucceeded(serviceResponse); //Since we have not created directories for new path, the feed instance status should be missing r = prism.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 5, 0, 0, 0); } /** * Submit the feeds on prism, and request for instance status on server. Request should succeed. */ @Test public void testFeedListingFeedSubmitOnPrismRequestOnServer() throws Exception { bundles[0].setProcessValidity("2010-01-02T01:00Z", "2010-01-02T01:21Z"); bundles[0].setProcessConcurrency(1); bundles[0].submitFeedsScheduleProcess(prism); InstanceUtil.waitTillInstancesAreCreated(clusterOC, bundles[0].getProcessData(), 0); FeedInstanceResult r = cluster.getFeedHelper() .getFeedInstanceListing(Util.readEntityName(bundles[0].getDataSets().get(0)), "?start=2010-01-02T01:00Z&end=2010-01-02T01:21Z"); validateResponse(r, 5, 5, 0, 0, 0); } /** * Checks that actual number of instances with different statuses are equal to expected number * of instances with matching statuses. * * @param instancesResult kind of response from API which should contain information about * instances <p/> * All parameters below reflect number of expected instances with some * kind of status. * @param totalCount total number of instances. * @param missingCount number of running instances. * @param emptyCount number of suspended instance. * @param partialCount number of waiting instance. * @param availableCount number of killed instance. */ private void validateResponse(FeedInstanceResult instancesResult, int totalCount, int missingCount, int emptyCount, int partialCount, int availableCount) { FeedInstanceResult.Instance[] instances = instancesResult.getInstances(); LOGGER.info("instances: " + Arrays.toString(instances)); Assert.assertNotNull(instances, "instances should be not null"); Assert.assertEquals(instances.length, totalCount, "Total Instances"); List<String> statuses = new ArrayList<>(); for (FeedInstanceResult.Instance instance : instances) { Assert.assertNotNull(instance.getCluster()); Assert.assertNotNull(instance.getInstance()); Assert.assertNotNull(instance.getStatus()); Assert.assertNotNull(instance.getUri()); Assert.assertNotNull(instance.getCreationTime()); Assert.assertNotNull(instance.getSize()); final String status = instance.getStatus(); LOGGER.info("status: "+ status + ", instance: " + instance.getInstance()); statuses.add(status); } Assert.assertEquals(Collections.frequency(statuses, "MISSING"), missingCount, "Missing Instances"); Assert.assertEquals(Collections.frequency(statuses, "EMPTY"), emptyCount, "Empty Instances"); Assert.assertEquals(Collections.frequency(statuses, "PARTIAL"), partialCount, "Partial Instances"); Assert.assertEquals(Collections.frequency(statuses, "AVAILABLE"), availableCount, "Available Instances"); } }