/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap;
import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import co.cask.cdap.api.spark.AbstractSpark;
import co.cask.cdap.api.spark.JavaSparkExecutionContext;
import co.cask.cdap.api.spark.JavaSparkMain;
import co.cask.cdap.api.spark.SparkClientContext;
import co.cask.cdap.api.workflow.AbstractWorkflow;
import co.cask.cdap.api.workflow.AbstractWorkflowAction;
import co.cask.cdap.api.workflow.NodeValue;
import co.cask.cdap.api.workflow.WorkflowContext;
import co.cask.cdap.api.workflow.WorkflowToken;
import co.cask.cdap.internal.app.runtime.batch.WordCount;
import co.cask.cdap.runtime.WorkflowTest;
import com.google.common.base.Preconditions;
import org.apache.hadoop.mapreduce.Job;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
* A workflow app used by {@link WorkflowTest} for testing.
*/
public class WorkflowApp extends AbstractApplication {
@Override
public void configure() {
setName("WorkflowApp");
setDescription("WorkflowApp");
addMapReduce(new WordCountMapReduce());
addSpark(new SparkWorkflowTestApp());
addWorkflow(new FunWorkflow());
}
/**
*
*/
public static class FunWorkflow extends AbstractWorkflow {
public static final String NAME = "FunWorkflow";
@Override
public void configure() {
setName(NAME);
setDescription("FunWorkflow description");
addMapReduce("ClassicWordCount");
addSpark("SparkWorkflowTest");
addAction(new CustomAction("verify"));
}
}
/**
*
*/
public static final class WordCountMapReduce extends AbstractMapReduce {
public static final String NAME = "ClassicWordCount";
@Override
public void configure() {
setName(NAME);
setDescription("WordCount job from Hadoop examples");
}
@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
Map<String, String> args = context.getRuntimeArguments();
String inputPath = args.get("inputPath");
String outputPath = args.get("outputPath");
WordCount.configureJob((Job) context.getHadoopJob(), inputPath, outputPath);
}
@Override
public void onFinish(boolean succeeded, MapReduceContext context) throws Exception {
context.getWorkflowToken().put("completed", context.getWorkflowInfo().getNodeId());
}
}
public static class SparkWorkflowTestApp extends AbstractSpark {
public static final String NAME = "SparkWorkflowTest";
@Override
public void configure() {
setName(NAME);
setDescription("Test Spark with Workflow");
setMainClass(SparkWorkflowTestProgram.class);
}
@Override
public void beforeSubmit(SparkClientContext context) throws Exception {
Preconditions.checkState(context.getWorkflowInfo() != null && context.getWorkflowToken() != null,
"WorkflowInfo and WorkflowToken shouldn't be null");
}
}
public static class SparkWorkflowTestProgram implements JavaSparkMain {
@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext jsc = new JavaSparkContext();
File outputDir = new File(sec.getRuntimeArguments().get("outputPath"));
File successFile = new File(outputDir, "_SUCCESS");
Preconditions.checkState(successFile.exists());
List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> distData = jsc.parallelize(data);
distData.collect();
// If there are problems accessing workflow token here, Spark will throw a NotSerializableException in the test
final WorkflowToken workflowToken = sec.getWorkflowToken();
if (workflowToken != null) {
workflowToken.put("otherKey", "otherValue");
}
distData.map(new Function<Integer, Integer>() {
@Override
public Integer call(Integer val) throws Exception {
if (workflowToken != null && workflowToken.get("tokenKey") != null) {
return 2 * val;
}
return val;
}
});
// Write something to workflow token
workflowToken.put("completed", sec.getWorkflowInfo().getNodeId());
}
}
/**
* Action to test configurer-style action configuration, extending AbstractWorkflowAction.
*/
public static final class CustomAction extends AbstractWorkflowAction {
private static final Logger LOG = LoggerFactory.getLogger(CustomAction.class);
private final String name;
@Property
private final boolean condition = true;
public CustomAction(String name) {
this.name = name;
}
@Override
public void configure() {
setName(name);
setDescription(name);
}
@Override
public void initialize(WorkflowContext context) throws Exception {
super.initialize(context);
LOG.info("Custom action initialized: " + context.getSpecification().getName());
WorkflowToken workflowToken = context.getToken();
// Token shouldn't be null
workflowToken.put("tokenKey", "value");
}
@Override
public void destroy() {
super.destroy();
LOG.info("Custom action destroyed: " + getContext().getSpecification().getName());
}
@Override
public void run() {
LOG.info("Custom action run");
File outputDir = new File(getContext().getRuntimeArguments().get("outputPath"));
Preconditions.checkState(condition && new File(outputDir, "_SUCCESS").exists());
// There should be two values for the "completed" key, one from MR, one from Spark
List<NodeValue> values = getContext().getToken().getAll("completed");
Preconditions.checkState(values.size() == 2);
LOG.info("Custom run completed.");
}
}
}