/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap; import co.cask.cdap.api.ProgramLifecycle; import co.cask.cdap.api.app.AbstractApplication; import co.cask.cdap.api.mapreduce.AbstractMapReduce; import co.cask.cdap.api.mapreduce.MapReduceContext; import co.cask.cdap.api.spark.AbstractSpark; import co.cask.cdap.api.spark.JavaSparkExecutionContext; import co.cask.cdap.api.spark.JavaSparkMain; import co.cask.cdap.api.workflow.AbstractWorkflow; import co.cask.cdap.api.workflow.Value; import co.cask.cdap.api.workflow.WorkflowInfo; import co.cask.cdap.api.workflow.WorkflowToken; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; /** * App to test the put operation on the WorkflowToken through map and reduce methods. * Also used to test the workflow run id in MapReduce programs that run inside the workflow. */ public class WorkflowTokenTestPutApp extends AbstractApplication { public static final String NAME = "WorkflowTokenTestPutApp"; @Override public void configure() { setName(NAME); setDescription("Application to test the put operation on the Workflow in beforeSubmit, " + "onFinish, map, and reduce methods of the MapReduce program."); addMapReduce(new RecordCounter()); addSpark(new SparkTestApp()); addWorkflow(new WorkflowTokenTestPut()); } public static class WorkflowTokenTestPut extends AbstractWorkflow { public static final String NAME = "WorkflowTokenTestPut"; @Override protected void configure() { setName(NAME); addMapReduce(RecordCounter.NAME); addSpark(SparkTestApp.NAME); } } /** * MapReduce program to count the occurrences of the ID in the input. */ public static final class RecordCounter extends AbstractMapReduce { public static final String NAME = "RecordCounter"; @Override public void configure() { setName(NAME); setDescription("MapReduce program to verify the records in the file."); } @Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); String inputPath = context.getRuntimeArguments().get("inputPath"); String outputPath = context.getRuntimeArguments().get("outputPath"); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // Put something in the token WorkflowToken workflowToken = context.getWorkflowToken(); if (workflowToken == null) { return; } workflowToken.put("action.type", "MapReduce"); workflowToken.put("start.time", Value.of(System.currentTimeMillis())); WorkflowInfo workflowInfo = context.getWorkflowInfo(); Preconditions.checkNotNull(workflowInfo); workflowToken.put("wf.runid", workflowInfo.getRunId().getId()); } @Override public void onFinish(boolean succeeded, MapReduceContext context) throws Exception { WorkflowToken workflowToken = context.getWorkflowToken(); workflowToken.put("end.time", Value.of(System.currentTimeMillis())); WorkflowInfo workflowInfo = context.getWorkflowInfo(); Preconditions.checkNotNull(workflowInfo); Preconditions.checkArgument(workflowInfo.getRunId().getId() .equals(workflowToken.get("wf.runid").toString())); } } /** * Mapper class to parse the input and emit ID and value. */ public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> implements ProgramLifecycle<MapReduceContext> { private WorkflowToken workflowToken; private Map<String, String> arguments; @Override public void initialize(MapReduceContext context) throws Exception { workflowToken = context.getWorkflowToken(); Preconditions.checkNotNull(workflowToken, "WorkflowToken cannot be null."); Preconditions.checkArgument(workflowToken.get("action.type").toString().equals("MapReduce")); WorkflowInfo workflowInfo = context.getWorkflowInfo(); Preconditions.checkNotNull(workflowInfo); Preconditions.checkArgument(workflowInfo.getRunId().toString() .equals(workflowToken.get("wf.runid").toString())); try { workflowToken.put("mapper.initialize.key", "mapper.initialize.value"); throw new IllegalStateException("Expected exception from workflowToken.put() in Mapper.initialize()"); } catch (UnsupportedOperationException e) { //expected } } public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Preconditions.checkArgument(workflowToken.get("action.type").toString().equals("MapReduce")); try { workflowToken.put("map.key", "map.value"); throw new IllegalStateException("Expected exception from workflowToken.put() in Mapper.map()"); } catch (UnsupportedOperationException e) { //expected } String[] fields = value.toString().split(":"); context.write(new Text(fields[0]), new Text(fields[1])); } @Override public void destroy() { // no-op } } /** * Reducer class to count the occurrences of the ID. */ public static class MyReducer extends Reducer<Text, Text, Text, IntWritable> implements ProgramLifecycle<MapReduceContext> { private WorkflowToken workflowToken; private Map<String, String> arguments; @Override public void initialize(MapReduceContext context) throws Exception { workflowToken = context.getWorkflowToken(); Preconditions.checkNotNull(workflowToken, "WorkflowToken cannot be null."); Preconditions.checkArgument(workflowToken.get("action.type").toString().equals("MapReduce")); WorkflowInfo workflowInfo = context.getWorkflowInfo(); Preconditions.checkNotNull(workflowInfo); Preconditions.checkArgument(workflowInfo.getRunId().toString() .equals(workflowToken.get("wf.runid").toString())); try { workflowToken.put("reducer.initialize.key", "reducer.initialize.value"); throw new IllegalStateException("Expected exception from workflowToken.put() in Reducer.initialize()"); } catch (UnsupportedOperationException e) { //expected } } public void reduce(Text key, Iterable<Text> value, Context context) throws IOException, InterruptedException { Preconditions.checkArgument(workflowToken.get("action.type").toString().equals("MapReduce")); try { workflowToken.put("reduce.key", "reduce.value"); throw new IllegalStateException("Expected exception from workflowToken.put() in Reducer.reduce()"); } catch (UnsupportedOperationException e) { //expected } context.write(key, new IntWritable(Iterables.size(value))); } @Override public void destroy() { // no-op } } /** * Spark application to test the put on the WorkflowToken. */ public static class SparkTestApp extends AbstractSpark { public static final String NAME = "SparkTestApp"; @Override public void configure() { setName(NAME); setDescription("Test Spark with the Workflow"); setMainClass(SparkTestProgram.class); } } public static class SparkTestProgram implements JavaSparkMain { @Override public void run(JavaSparkExecutionContext sec) throws Exception { JavaSparkContext jsc = new JavaSparkContext(); List<Integer> data = Arrays.asList(1, 2, 3, 4, 5); final WorkflowToken workflowToken = sec.getWorkflowToken(); if (workflowToken != null) { workflowToken.put("multiplier", "2"); } JavaRDD<Integer> distData = jsc.parallelize(data); JavaRDD<Integer> mapData = distData.map(new Function<Integer, Integer>() { @Override public Integer call(Integer val) throws Exception { try { workflowToken.put("some.key", "some.value"); throw new IllegalStateException("Expected exception from workflowToken.put() in Spark closure"); } catch (UnsupportedOperationException e) { //expected } if (workflowToken.get("multiplier") != null) { int multiplier = workflowToken.get("multiplier").getAsInt(); return multiplier * val; } return val; } }); mapData.collect(); } } }