package edu.indiana.soic.ts.mapreduce;
import edu.indiana.soic.ts.utils.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
public class HistogramGenerator {
private static final Logger LOG = LoggerFactory.getLogger(HistogramGenerator.class);
private double min;
private double max;
private int bins;
private String vectDir;
private String interHistDir;
private TSConfiguration tsConfiguration;
public void configure(String []args) {
String configFile = Utils.getConfigurationFile(args);
this.tsConfiguration = new TSConfiguration(configFile);
Map tsConf = tsConfiguration.getConf();
min = (double) tsConf.get(TSConfiguration.Histogram.MIN);
max = (double) tsConf.get(TSConfiguration.Histogram.MAX);
bins = (int) tsConf.get(TSConfiguration.Histogram.NO_OF_BINS);
this.interHistDir = tsConfiguration.getIntermediateHistDir();
this.vectDir = tsConfiguration.getVectorDir();
}
public int execJob(Configuration conf, String vectorFileFullPath, String vectorFile, String interHistDir) throws Exception {
LOG.info(vectorFileFullPath);
Job job = new Job(conf, "Pairwise-calc-" + vectorFile);
/* create the out dir for this job. Delete and recreates if it exists */
Path hdOutDir = new Path(interHistDir + "/" + vectorFile);
FileSystem fs = FileSystem.get(conf);
fs.delete(hdOutDir, true);
conf.set("mapreduce.output.textoutputformat.separator", ",");
Path hdInputDir = new Path(this.vectDir + "/" + vectorFile);
job.setJarByClass(HistogramGenerator.class);
job.setMapperClass(HistogramMapper.class);
job.setReducerClass(HistogramReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, hdInputDir);
FileOutputFormat.setOutputPath(job, hdOutDir);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.getConfiguration().setDouble(TSConfiguration.Histogram.MIN, min);
job.getConfiguration().setDouble(TSConfiguration.Histogram.MAX, max);
job.getConfiguration().setInt(TSConfiguration.Histogram.NO_OF_BINS, bins);
long startTime = System.currentTimeMillis();
int exitStatus = job.waitForCompletion(true) ? 0 : 1;
double executionTime = (System.currentTimeMillis() - startTime) / 1000.0;
LOG.info("Job Finished in " + executionTime + " seconds");
return exitStatus;
}
public void submitJob() {
Configuration conf = new Configuration();
FileSystem fs;
try {
fs = FileSystem.get(conf);
FileStatus[] status = fs.listStatus(new Path(vectDir));
for (FileStatus statu : status) {
String sequenceFile = statu.getPath().getName();
String sequenceFileFullPath = vectDir + "/" + sequenceFile;
try {
execJob(conf, sequenceFileFullPath, sequenceFile, interHistDir);
Utils.concatOutput2(conf, sequenceFile, interHistDir + "/" + sequenceFile,
tsConfiguration.getHistDir(), tsConfiguration.getFixedClassFile());
} catch (Exception e) {
String message = "Failed to executed PWD calculation:" + sequenceFileFullPath + " " + interHistDir;
LOG.info(message, e);
throw new RuntimeException(message);
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static class HistogramMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
private Bin[] bins;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
double min = conf.getDouble(TSConfiguration.Histogram.MIN, -1);
double max = conf.getDouble(TSConfiguration.Histogram.MAX, 1);
int noOfBins = conf.getInt(TSConfiguration.Histogram.NO_OF_BINS, 10);
this.bins = getBins(noOfBins, max, min);
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
VectorPoint p = Utils.parseVector(value.toString());
if (p != null) {
double d = vectorDelta(p.getNumbers());
// LOG.info("delta: {}", d);
int binIndex = getBinIndex(d, this.bins);
context.write(new IntWritable(binIndex), new Text(p.getSymbol()));
}
}
}
public static class HistogramReducer extends Reducer<IntWritable, Text, IntWritable, Text> {
private Bin[] bins;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
double min = conf.getDouble(TSConfiguration.Histogram.MIN, -1);
double max = conf.getDouble(TSConfiguration.Histogram.MAX, 1);
int noOfBins = conf.getInt(TSConfiguration.Histogram.NO_OF_BINS, 10);
this.bins = getBins(noOfBins, max, min);
}
public void reduce(IntWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
Bin bin = this.bins[key.get()];
sb.append(bin.start).append(",").append(bin.end).append(",");
for (Text t : values) {
sb.append(t.toString()).append(",");
}
context.write(key, new Text(sb.toString()));
}
}
private static int getBinIndex(double val, Bin []bins) {
for (int i = 0; i < bins.length; i++) {
Bin b = bins[i];
// add all that is below the 0'th bin to 0
if (val < b.start) {
return i;
}
if (b.start <= val && b.end >= val) {
return i;
}
}
// add all that is over the last bin value to last
return bins.length - 1;
}
private static Bin[] getBins(int noOfBins, double max, double min) {
double delta = (max - min) / noOfBins;
Bin []bins = new Bin[noOfBins];
for (int i = 0; i < bins.length; i++) {
Bin b = new Bin();
b.start = min + i * delta;
b.end = min + (i + 1)* (delta);
bins[i] = b;
}
return bins;
}
private static double vectorDelta(double []n) {
double sum = 0.0;
for (double aN : n) {
sum += aN;
}
if (sum == 0) return .1;
double delta = n[n.length - 1] - n[0];
return delta * n.length / sum;
}
public static void main(String[] args) {
HistogramGenerator hist = new HistogramGenerator();
hist.configure(args);
hist.submitJob();
}
}