/* * Copyright © 2014-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.runtime.batch; import co.cask.cdap.api.app.AbstractApplication; import co.cask.cdap.api.data.batch.Input; import co.cask.cdap.api.data.batch.Output; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.api.mapreduce.AbstractMapReduce; import co.cask.cdap.api.mapreduce.MapReduceContext; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; /** * App used to test whether M/R can read from file datasets. */ public class AppWithMapReduceUsingFileSet extends AbstractApplication { public static String inputDataset = System.getProperty("INPUT_DATASET_NAME"); public static String outputDataset = System.getProperty("OUTPUT_DATASET_NAME"); @Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(inputDataset, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); if (!outputDataset.equals(inputDataset)) { createDataset(outputDataset, "fileSet", FileSetProperties.builder() .setBasePath("foo/my-file-output") .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); } addMapReduce(new ComputeSum()); } /** * */ public static final class ComputeSum extends AbstractMapReduce { @Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setReducerClass(FileReducer.class); // user can opt to define the mapper class through our APIs, instead of directly on the job context.addInput(Input.ofDataset(inputDataset), FileMapper.class); context.addOutput(Output.ofDataset(outputDataset)); } } public static class FileMapper extends Mapper<LongWritable, Text, Text, LongWritable> { public static final String ONLY_KEY = "x"; @Override public void map(LongWritable key, Text data, Context context) throws IOException, InterruptedException { context.write(new Text(ONLY_KEY), new LongWritable(Long.valueOf(data.toString()))); } } public static class FileReducer extends Reducer<Text, LongWritable, String, Long> { @Override public void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0L; for (LongWritable value : values) { sum += value.get(); } context.write(key.toString(), sum); } } }