/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.regression;
import org.apache.commons.lang.StringUtils;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.regression.Entities.FeedMerlin;
import org.apache.falcon.regression.core.bundle.Bundle;
import org.apache.falcon.regression.core.helpers.ColoHelper;
import org.apache.falcon.regression.core.util.HCatUtil;
import org.apache.falcon.regression.core.util.AssertUtil;
import org.apache.falcon.regression.core.util.TimeUtil;
import org.apache.falcon.regression.core.util.HadoopUtil;
import org.apache.falcon.regression.core.util.BundleUtil;
import org.apache.falcon.regression.core.util.InstanceUtil;
import org.apache.falcon.regression.core.util.OSUtil;
import org.apache.falcon.regression.testHelper.BaseTestClass;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hive.hcatalog.api.HCatClient;
import org.apache.hive.hcatalog.api.HCatCreateTableDesc;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.log4j.Logger;
import org.apache.oozie.client.CoordinatorAction;
import org.apache.oozie.client.OozieClient;
import org.joda.time.format.DateTimeFormat;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
/**
* Test where a single workflow contains multiple actions.
*/
@Test(groups = "embedded")
public class CombinedActionsTest extends BaseTestClass {
private ColoHelper cluster = servers.get(0);
private FileSystem clusterFS = serverFS.get(0);
private OozieClient clusterOC = serverOC.get(0);
private HCatClient clusterHC;
private final String hiveTestDir = "/HiveData";
private final String baseTestHDFSDir = cleanAndGetTestDir() + hiveTestDir;
private final String inputHDFSDir = baseTestHDFSDir + "/input";
private final String outputHDFSDir = baseTestHDFSDir + "/output";
private String aggregateWorkflowDir = cleanAndGetTestDir() + "/aggregator";
private static final Logger LOGGER = Logger.getLogger(CombinedActionsTest.class);
private static final String HCATDIR = OSUtil.concat("src", "test", "resources", "hcat");
private static final String LOCALHCATDATA = OSUtil.concat(HCATDIR, "data");
public static final String DBNAME = "default";
public static final String COL1NAME = "id";
public static final String COL2NAME = "value";
public static final String PARTITIONCOLUMN = "dt";
private final String inputTableName = "combinedactionstest_input_table";
private final String outputTableName = "combinedactionstest_output_table";
private String pigMrTestDir = cleanAndGetTestDir() + "/pigMrData";
private String inputPath = pigMrTestDir + "/input/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}";
private String outputPathPig = pigMrTestDir + "/output/pig/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}";
private String outputPathMr = pigMrTestDir + "/output/mr/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}";
@BeforeClass(alwaysRun = true)
public void uploadWorkflow() throws Exception {
HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.OOZIE_COMBINED_ACTIONS);
}
@BeforeMethod(alwaysRun = true)
public void setUp(Method method) throws Exception {
LOGGER.info("test name: " + method.getName());
clusterHC = cluster.getClusterHelper().getHCatClient();
bundles[0] = BundleUtil.readCombinedActionsBundle();
bundles[0] = new Bundle(bundles[0], cluster);
bundles[0].generateUniqueBundle(this);
AssertUtil.assertSucceeded(prism.getClusterHelper().submitEntity(bundles[0].getClusters().get(0)));
}
@AfterMethod(alwaysRun = true)
public void tearDown()throws Exception {
removeTestClassEntities();
clusterHC.dropTable(DBNAME, inputTableName, true);
clusterHC.dropTable(DBNAME, outputTableName, true);
HadoopUtil.deleteDirIfExists(pigMrTestDir, clusterFS);
}
/**
*Schedule a process, for which the oozie workflow contains multiple actions like hive, mr, pig
*The process should succeed. Fails right now due to: https://issues.apache.org/jira/browse/FALCON-670
*
* @throws Exception
*/
@Test
public void combinedMrPigHiveAction()throws Exception{
//create data for pig, mr and hcat jobs
final String startDate = "2010-01-01T20:00Z";
final String endDate = "2010-01-02T04:00Z";
String inputFeedMrPig = bundles[0].getFeed("sampleFeed1");
FeedMerlin feedObj = new FeedMerlin(inputFeedMrPig);
HadoopUtil.deleteDirIfExists(pigMrTestDir + "/input", clusterFS);
List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 20);
HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.concat(OSUtil.NORMAL_INPUT, pigMrTestDir, "input"),
dataDates);
final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH", "mm"}, "-");
dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 60, DateTimeFormat.forPattern(datePattern));
final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, LOCALHCATDATA,
inputHDFSDir, dataDates);
ArrayList<HCatFieldSchema> cols = new ArrayList<>();
cols.add(HCatUtil.getStringSchema(COL1NAME, COL1NAME + " comment"));
cols.add(HCatUtil.getStringSchema(COL2NAME, COL2NAME + " comment"));
ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();
partitionCols.add(HCatUtil.getStringSchema(PARTITIONCOLUMN, PARTITIONCOLUMN + " partition"));
clusterHC.createTable(HCatCreateTableDesc
.create(DBNAME, inputTableName, cols)
.partCols(partitionCols)
.ifNotExists(true)
.isTableExternal(true)
.location(inputHDFSDir)
.build());
clusterHC.createTable(HCatCreateTableDesc
.create(DBNAME, outputTableName, cols)
.partCols(partitionCols)
.ifNotExists(true)
.isTableExternal(true)
.location(outputHDFSDir)
.build());
HCatUtil.addPartitionsToTable(clusterHC, dataDates, dataset, "dt", DBNAME, inputTableName);
final String tableUriPartitionFragment = StringUtils.join(
new String[]{"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}"}, "-");
String inputTableUri =
"catalog:" + DBNAME + ":" + inputTableName + tableUriPartitionFragment;
String outputTableUri =
"catalog:" + DBNAME + ":" + outputTableName + tableUriPartitionFragment;
//Set input and output feeds for bundle
//input feed for both mr and pig jobs
feedObj.setLocation(LocationType.DATA, inputPath);
LOGGER.info(feedObj.toString());
AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
//output feed for pig jobs
String outputFeedPig = bundles[0].getFeed("sampleFeed2");
feedObj = new FeedMerlin(outputFeedPig);
feedObj.setLocation(LocationType.DATA, outputPathPig);
feedObj.setFrequency(new Frequency("5", Frequency.TimeUnit.minutes));
AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
//output feed for mr jobs
String outputFeedMr = bundles[0].getFeed("sampleFeed3");
feedObj = new FeedMerlin(outputFeedMr);
feedObj.setLocation(LocationType.DATA, outputPathMr);
AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
//input feed for hcat jobs
String inputHive = bundles[0].getFeed("sampleFeedHCat1");
feedObj = new FeedMerlin(inputHive);
feedObj.getTable().setUri(inputTableUri);
feedObj.setFrequency(new Frequency("1", Frequency.TimeUnit.hours));
feedObj.getClusters().getClusters().get(0).getValidity()
.setStart(TimeUtil.oozieDateToDate(startDate).toDate());
feedObj.getClusters().getClusters().get(0).getValidity()
.setEnd(TimeUtil.oozieDateToDate(endDate).toDate());
AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
//output feed for hcat jobs
String outputHive = bundles[0].getFeed("sampleFeedHCat2");
feedObj = new FeedMerlin(outputHive);
feedObj.getTable().setUri(outputTableUri);
feedObj.setFrequency(new Frequency("1", Frequency.TimeUnit.hours));
feedObj.getClusters().getClusters().get(0).getValidity()
.setStart(TimeUtil.oozieDateToDate(startDate).toDate());
feedObj.getClusters().getClusters().get(0).getValidity()
.setEnd(TimeUtil.oozieDateToDate(endDate).toDate());
AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
bundles[0].setProcessWorkflow(aggregateWorkflowDir);
bundles[0].setProcessValidity(startDate, endDate);
bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(bundles[0].getProcessData()));
InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(),
1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);
}
}