/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.app.runtime.batch;
import co.cask.cdap.api.ProgramLifecycle;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.batch.Input;
import co.cask.cdap.api.data.batch.Output;
import co.cask.cdap.api.dataset.lib.FileSetArguments;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.table.Put;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import co.cask.cdap.api.mapreduce.MapReduceTaskContext;
import com.google.common.collect.Maps;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Map;
/**
* App used to test whether M/R can read from file datasets referenced at runtime.
*/
public class AppWithMapReduceUsingRuntimeDatasets extends AbstractApplication {
public static final String INPUT_NAME = "input.name";
public static final String INPUT_PATHS = "input.paths";
public static final String OUTPUT_NAME = "output.name";
public static final String OUTPUT_PATH = "output.path";
public static final String APP_NAME = "appWithRuntimeDS";
public static final String MR_NAME = "computeSum";
public static final byte[] INPUT_RECORDS = Bytes.toBytes("inputRecords");
public static final byte[] REDUCE_KEYS = Bytes.toBytes("reduceKeys");
public static final String COUNTERS = "dynCounters";
@Override
public void configure() {
setName(APP_NAME);
setDescription("Application with MapReduce job using file as dataset");
addMapReduce(new ComputeSum());
createDataset("rtt", Table.class.getName());
createDataset(COUNTERS, KeyValueTable.class.getName());
}
/**
*
*/
public static final class ComputeSum extends AbstractMapReduce {
@Override
protected void configure() {
setName(MR_NAME);
}
@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
Job job = context.getHadoopJob();
job.setMapperClass(FileMapper.class);
job.setReducerClass(FileReducer.class);
Map<String, String> runtimeArgs = context.getRuntimeArguments();
String inputName = runtimeArgs.get(INPUT_NAME);
String inputPaths = runtimeArgs.get(INPUT_PATHS);
String outputName = runtimeArgs.get(OUTPUT_NAME);
String outputPath = runtimeArgs.get(OUTPUT_PATH);
// Setup input and output file sets
Map<String, String> args = Maps.newHashMap();
FileSetArguments.setInputPaths(args, inputPaths);
context.addInput(Input.ofDataset(inputName, args));
args.clear();
FileSetArguments.setOutputPath(args, outputPath);
context.addOutput(Output.ofDataset(outputName, args));
Table table = context.getDataset("rtt");
table.put(new Put("a").add("b", "c"));
}
}
public static class FileMapper
extends Mapper<LongWritable, Text, Text, LongWritable>
implements ProgramLifecycle<MapReduceTaskContext> {
private KeyValueTable counters = null;
public static final String ONLY_KEY = "x";
@Override
public void map(LongWritable key, Text data, Context context)
throws IOException, InterruptedException {
counters.increment(INPUT_RECORDS, 1L);
context.write(new Text(ONLY_KEY), new LongWritable(Long.valueOf(data.toString())));
}
@Override
public void initialize(MapReduceTaskContext context) throws Exception {
counters = context.getDataset(COUNTERS);
}
@Override
public void destroy() {
// no-op
}
}
public static class FileReducer
extends Reducer<Text, LongWritable, String, Long>
implements ProgramLifecycle<MapReduceTaskContext> {
private KeyValueTable counters = null;
public void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
counters.increment(REDUCE_KEYS, 1L);
long sum = 0L;
for (LongWritable value : values) {
sum += value.get();
}
context.write(key.toString(), sum);
}
@Override
public void initialize(MapReduceTaskContext context) throws Exception {
counters = context.getDataset(COUNTERS);
}
@Override
public void destroy() {
// no-op
}
}
}