package hip.ch8;
import org.apache.hadoop.mapred.JobHistory;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.TimeUnit;
public final class DataSkewGnuplot {
public static void main(String... args) throws Exception {
try {
dumpTaskTimes(args);
} catch (Throwable t) {
t.printStackTrace();
}
}
public static void dumpTaskTimes(String... args)
throws Exception {
JobHistory.JobInfo job = JobHistoryHelper.getJobInfoFromCliArgs(args);
List<TaskMetrics> mapMetrics =
JobHistoryHelper.getMapTaskMetrics(job);
List<TaskMetrics> reduceMetrics =
JobHistoryHelper.getReduceTaskMetrics(
job);
System.out.println("# MAP-EXEC-TIME-SECS\tMAP_INPUT_BYTES");
dumpTaskTimes(mapMetrics, new TaskMetrics.ExecTimeComparator());
System.out.println();
System.out.println("# REDUCE-EXEC-TIME-SECS\tREDUCE_INPUT_BYTES");
dumpTaskTimes(reduceMetrics, new TaskMetrics.ExecTimeComparator());
}
public static void dumpTaskTimes(List<TaskMetrics> metrics,
Comparator<TaskMetrics> comparator) {
Collections.sort(metrics, comparator);
for (TaskMetrics m : metrics) {
System.out.println(
TimeUnit.MILLISECONDS.toSeconds(m.getOverallTimeMillis()) +
"\t" +
m.getInputBytes());
}
}
}