package com.linkedin.camus.etl.kafka.reporter;
import java.util.Map;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskReport;
import com.linkedin.camus.etl.kafka.reporter.BaseReporter;
public class TimeReporter extends BaseReporter {
public void report(Job job, Map<String, Long> timingMap) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append("***********Timing Report*************\n");
sb.append("Job time (seconds):\n");
double preSetup = timingMap.get("pre-setup") / 1000;
double getSplits = timingMap.get("getSplits") / 1000;
double hadoop = timingMap.get("hadoop") / 1000;
double commit = timingMap.get("commit") / 1000;
double total = timingMap.get("total") / 1000;
sb.append(String.format(" %12s %6.1f (%s)\n", "pre setup", preSetup,
NumberFormat.getPercentInstance().format(preSetup / total).toString()));
sb.append(String.format(" %12s %6.1f (%s)\n", "get splits", getSplits,
NumberFormat.getPercentInstance().format(getSplits / total).toString()));
sb.append(String.format(" %12s %6.1f (%s)\n", "hadoop job", hadoop,
NumberFormat.getPercentInstance().format(hadoop / total).toString()));
sb.append(String.format(" %12s %6.1f (%s)\n", "commit", commit,
NumberFormat.getPercentInstance().format(commit / total).toString()));
int minutes = (int) total / 60;
int seconds = (int) total % 60;
sb.append(String.format("Total: %d minutes %d seconds\n", minutes, seconds));
JobClient client = new JobClient(new JobConf(job.getConfiguration()));
TaskReport[] tasks = client.getMapTaskReports(JobID.downgrade(job.getJobID()));
double min = Long.MAX_VALUE, max = 0, mean = 0;
double minRun = Long.MAX_VALUE, maxRun = 0, meanRun = 0;
long totalTaskTime = 0;
TreeMap<Long, List<TaskReport>> taskMap = new TreeMap<Long, List<TaskReport>>();
for (TaskReport t : tasks) {
long wait = t.getStartTime() - timingMap.get("hadoop_start");
min = wait < min ? wait : min;
max = wait > max ? wait : max;
mean += wait;
long runTime = t.getFinishTime() - t.getStartTime();
totalTaskTime += runTime;
minRun = runTime < minRun ? runTime : minRun;
maxRun = runTime > maxRun ? runTime : maxRun;
meanRun += runTime;
if (!taskMap.containsKey(runTime)) {
taskMap.put(runTime, new ArrayList<TaskReport>());
}
taskMap.get(runTime).add(t);
}
mean /= tasks.length;
meanRun /= tasks.length;
// convert to seconds
min /= 1000;
max /= 1000;
mean /= 1000;
minRun /= 1000;
maxRun /= 1000;
meanRun /= 1000;
sb.append("\nHadoop job task times (seconds):\n");
sb.append(String.format(" %12s %6.1f\n", "min", minRun));
sb.append(String.format(" %12s %6.1f\n", "mean", meanRun));
sb.append(String.format(" %12s %6.1f\n", "max", maxRun));
sb.append(String.format(" %12s %6.1f/%.1f = %.2f\n", "skew", meanRun, maxRun, meanRun / maxRun));
sb.append("\nTask wait time (seconds):\n");
sb.append(String.format(" %12s %6.1f\n", "min", min));
sb.append(String.format(" %12s %6.1f\n", "mean", mean));
sb.append(String.format(" %12s %6.1f\n", "max", max));
CounterGroup totalGrp = job.getCounters().getGroup("total");
long decode = totalGrp.findCounter("decode-time(ms)").getValue();
long request = totalGrp.findCounter("request-time(ms)").getValue();
long map = totalGrp.findCounter("mapper-time(ms)").getValue();
long mb = totalGrp.findCounter("data-read").getValue();
long other = totalTaskTime - map - request - decode;
sb.append("\nHadoop task breakdown:\n");
sb.append(String.format(" %12s %s\n", "kafka",
NumberFormat.getPercentInstance().format(request / (double) totalTaskTime)));
sb.append(String.format(" %12s %s\n", "decode",
NumberFormat.getPercentInstance().format(decode / (double) totalTaskTime)));
sb.append(String.format(" %12s %s\n", "map output",
NumberFormat.getPercentInstance().format(map / (double) totalTaskTime)));
sb.append(String.format(" %12s %s\n", "other",
NumberFormat.getPercentInstance().format(other / (double) totalTaskTime)));
sb.append(String.format("\n%16s %s\n", "Total MB read:", mb / 1024 / 1024));
log.info(sb.toString());
}
}