package edu.indiana.soic.ts.mapreduce.postproc;
import edu.indiana.soic.ts.utils.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
public class LabelGenerator {
private static final Logger LOG = LoggerFactory.getLogger(LabelGenerator.class);
private String pointFileDir;
private String vectorFileDir;
private String interOutDir;
private TSConfiguration tsConfiguration;
public static void main(String[] args) throws Exception {
String configFile = Utils.getConfigurationFile(args);
TSConfiguration tsConfiguration = new TSConfiguration(configFile);
LabelGenerator vectorCalculator = new LabelGenerator();
vectorCalculator.configure(tsConfiguration);
vectorCalculator.submitJob();
}
public void configure(TSConfiguration tsConfiguration) {
this.tsConfiguration = tsConfiguration;
this.interOutDir = tsConfiguration.getIntermediateLabelDir();
this.vectorFileDir = tsConfiguration.getVectorDir();
this.pointFileDir = tsConfiguration.getPointDir();
}
public int execJob(Configuration conf, String fileName) throws Exception {
Job job = new Job(conf, "Labelgen-" + fileName);
Path vectorFilePath = new Path(this.vectorFileDir + "/" + fileName);
Path pointFilePath = new Path(this.pointFileDir + "/" + fileName);
/* create the out dir for this job. Delete and recreates if it exists */
Path labelOutDir = new Path(this.interOutDir + "/" + fileName);
FileSystem fs = FileSystem.get(conf);
fs.delete(labelOutDir, true);
MultipleInputs.addInputPath(job, vectorFilePath, TextInputFormat.class, VectorReadMapper.class);
MultipleInputs.addInputPath(job, pointFilePath, TextInputFormat.class, PointReadMapper.class);
job.setJarByClass(LabelGenerator.class);
job.setReducerClass(LabelGeneratorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, labelOutDir);
job.setOutputFormatClass(TextOutputFormat.class);
long startTime = System.currentTimeMillis();
int exitStatus = job.waitForCompletion(true) ? 0 : 1;
double executionTime = (System.currentTimeMillis() - startTime) / 1000.0;
LOG.info("Job Finished in " + executionTime + " seconds");
return exitStatus;
}
public void submitJob() {
Configuration conf = new Configuration();
FileSystem fs;
try {
fs = FileSystem.get(conf);
FileStatus[] status = fs.listStatus(new Path(this.pointFileDir));
for (FileStatus statu : status) {
String fileName = statu.getPath().getName();
try {
execJob(conf, fileName);
Utils.concatOutput(conf, fileName, this.interOutDir + "/" + fileName, tsConfiguration.getLabelDir());
} catch (Exception e) {
String message = "Failed to executed label generation:" + fileName;
LOG.info(message, e);
throw new RuntimeException(message);
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public class VectorReadMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
VectorPoint p = Utils.parseVector(value.toString());
if (p != null && p.getSymbol() != null) {
context.write(key, new Text("#" + p.getSymbol()));
} else {
String msg = "Invalid vector point";
LOG.error(msg);
throw new RuntimeException(msg);
}
}
}
public class PointReadMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try {
context.write(key, value);
} catch (Exception e) {
String msg = "Failed to read the point";
LOG.error(msg, e);
throw new RuntimeException(msg, e);
}
}
}
public class LabelGeneratorReducer extends Reducer<LongWritable, Text, Text, Text> {
private Map<String, Integer> symbolsToClass;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
FileSystem fs = FileSystem.get(conf);
String histoFile = conf.get(TSConfiguration.Histogram.HISTO_FILE);
Path histoFilePath = new Path(histoFile);
symbolsToClass = loadHistoSectors(fs, histoFilePath);
}
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String symbol = null;
String pointValue = null;
for (Text t : values) {
String ts = t.toString();
if (ts.startsWith("#")) {
symbol = ts.substring(1);
} else {
pointValue = ts;
}
}
if (symbol != null && pointValue != null) {
try {
Point p = Utils.readPointWithoutSymbol(pointValue);
p.setClazz(symbolsToClass.get(symbol));
context.write(new Text(symbol), new Text(p.serialize()));
} catch (Exception e) {
String msg = "Failed to read point: " + pointValue;
LOG.error(msg, e);
throw new RuntimeException(msg, e);
}
}
}
}
private static Map<String, Integer> loadHistoSectors(FileSystem fs, Path histoFile) {
BufferedReader br = null;
Map<String, Integer> classToSymbo = new HashMap<>();
try {
br = new BufferedReader(new InputStreamReader(fs.open(histoFile)));
String line;
while ((line = br.readLine()) != null) {
// process the line.
Bin bin = Utils.readBin(line);
if (bin != null) {
for (String s : bin.symbols) {
classToSymbo.put(s, bin.index + 1);
}
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException ignore) {
}
}
}
return classToSymbo;
}
}