/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.runtime.batch.dataset.output; import co.cask.cdap.api.ProgramLifecycle; import co.cask.cdap.api.app.AbstractApplication; import co.cask.cdap.api.data.batch.Input; import co.cask.cdap.api.data.batch.Output; import co.cask.cdap.api.dataset.lib.FileSetArguments; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.api.mapreduce.AbstractMapReduce; import co.cask.cdap.api.mapreduce.MapReduceContext; import co.cask.cdap.api.mapreduce.MapReduceTaskContext; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; import java.util.HashMap; import java.util.Map; /** * App used to test whether M/R can write to multiple outputs. Tests writing to the same dataset, with different runtime * arguments, as two different outputs. */ public class AppWithMapReduceUsingMultipleOutputs extends AbstractApplication { public static final String PURCHASES = "purchases"; public static final String SEPARATED_PURCHASES = "smallPurchases"; @Override public void configure() { setName("AppWithMapReduceUsingMultipleOutputs"); setDescription("Application with MapReduce job using multiple outputs"); createDataset(PURCHASES, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); createDataset(SEPARATED_PURCHASES, "fileSet", FileSetProperties.builder() .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, " ") .build()); addMapReduce(new SeparatePurchases()); addMapReduce(new InvalidMapReduce()); } /** * Simple map-only MR that simply writes to a different output, depending on the spend amount. */ public static class SeparatePurchases extends AbstractMapReduce { @Override public void beforeSubmit(MapReduceContext context) throws Exception { Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "inputFile"); // test using a stream with the same name, but aliasing it differently (so mapper gets the alias'd name) context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class); Map<String, String> output1Args = new HashMap<>(); FileSetArguments.setOutputPath(output1Args, "small_purchases"); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output1Args).alias("small_purchases")); Map<String, String> output2Args = new HashMap<>(); FileSetArguments.setOutputPath(output2Args, "large_purchases"); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output2Args).alias("large_purchases")); Job job = context.getHadoopJob(); job.setMapperClass(FileMapper.class); job.setNumReduceTasks(0); } } /** * This is an invalid MR because it adds an output a second time, with the same alias. */ public static class InvalidMapReduce extends SeparatePurchases { @Override public void beforeSubmit(MapReduceContext context) throws Exception { super.beforeSubmit(context); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES).alias("small_purchases")); } } public static class FileMapper extends Mapper<LongWritable, Text, LongWritable, Text> implements ProgramLifecycle<MapReduceTaskContext<NullWritable, Text>> { private MapReduceTaskContext<NullWritable, Text> mapReduceTaskContext; @Override public void initialize(MapReduceTaskContext<NullWritable, Text> context) throws Exception { this.mapReduceTaskContext = context; } @Override public void map(LongWritable key, Text data, Context context) throws IOException, InterruptedException { String spend = data.toString().split(" ")[1]; String output = Integer.valueOf(spend) > 50 ? "large_purchases" : "small_purchases"; mapReduceTaskContext.write(output, NullWritable.get(), data); } @Override public void destroy() { } } }