package hip.ch8;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.Arrays;
public final class SimpleMovingAverage {
public static void main(String... args) throws Exception {
runJob(
Arrays.copyOfRange(args, 0, args.length - 1),
args[args.length - 1]);
}
public static void runJob(String[] input, String output)
throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(SimpleMovingAverage.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
Path outputPath = new Path(output);
FileInputFormat.setInputPaths(job, StringUtils.join(input, ","));
FileOutputFormat.setOutputPath(job, outputPath);
outputPath.getFileSystem(conf).delete(outputPath, true);
job.waitForCompletion(true);
}
public static class Map
extends Mapper<LongWritable, Text, Text, Text> {
private Text documentId;
private Text word = new Text();
@Override
protected void setup(Context context) {
String filename =
((FileSplit) context.getInputSplit()).getPath().getName();
documentId = new Text(filename);
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String token : StringUtils.split(value.toString())) {
word.set(token);
context.write(word, documentId);
}
}
}
public static class Reduce
extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
DoubleWritable outValue = new DoubleWritable();
public void reduce(Text key, Iterable<DoubleWritable> values,
Context context)
throws IOException, InterruptedException {
double total = 0;
int instances = 0;
for (DoubleWritable stockPrice : values) {
total += stockPrice.get();
instances++;
}
outValue.set(total / (double) instances);
context.write(key, outValue);
}
}
public static class Reduce2
extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
SMA sma = new SMA();
DoubleWritable outValue = new DoubleWritable();
public void reduce(Text key, Iterable<DoubleWritable> values,
Context context)
throws IOException, InterruptedException {
sma.reset();
for (DoubleWritable stockPrice : values) {
sma.add(stockPrice.get());
}
outValue.set(sma.calculate());
context.write(key, outValue);
}
}
public static class SMA {
protected double total = 0;
protected int instances = 0;
public void add(double value) {
total += value;
instances ++;
}
public double calculate() {
return total / (double) instances;
}
public void reset() {
total = 0;
instances = 0;
}
}
}