package hip.ch8;
import org.apache.hadoop.mapred.JobHistory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public final class ExtractJobTaskTimeline {
public static void main(String... args) throws Exception {
try {
dumpTaskTimes(args);
} catch (Throwable t) {
t.printStackTrace();
}
}
public static void dumpTaskTimes(String... args)
throws Exception {
JobHistory.JobInfo job = JobHistoryHelper.getJobInfoFromCliArgs(args);
long startTime = job.getLong(JobHistory.Keys.LAUNCH_TIME);
long endTime = job.getLong(JobHistory.Keys.FINISH_TIME);
List<TimeRange> mapRanges = new ArrayList<TimeRange>();
List<TimeRange> reduceRanges = new ArrayList<TimeRange>();
List<TimeRange> shuffleRanges = new ArrayList<TimeRange>();
List<TimeRange> sortRanges = new ArrayList<TimeRange>();
Map<String, JobHistory.Task> tasks = job.getAllTasks();
for (JobHistory.Task task : tasks.values()) {
for (JobHistory.TaskAttempt attempt : task.getTaskAttempts()
.values()) {
String taskId = attempt.get(JobHistory.Keys.TASK_ATTEMPT_ID);
String taskType = task.get(JobHistory.Keys.TASK_TYPE);
String taskStatus = task.get(JobHistory.Keys.TASK_STATUS);
System.out.println(taskId + " " + taskType + " " + taskStatus);
long taskStartTime =
attempt.getLong(JobHistory.Keys.START_TIME);
long taskEndTime =
attempt.getLong(JobHistory.Keys.FINISH_TIME);
TimeRange range =
new TimeRange(TimeUnit.MILLISECONDS, taskStartTime,
taskEndTime);
if (JobHistory.Values.MAP.name().equals(taskType)) {
mapRanges.add(range);
} else if (JobHistory.Values.REDUCE.name().equals(taskType)) {
long shuffleEndTime =
attempt.getLong(JobHistory.Keys.SHUFFLE_FINISHED);
long sortEndTime =
attempt.getLong(JobHistory.Keys.SORT_FINISHED);
shuffleRanges.add(
new TimeRange(TimeUnit.MILLISECONDS, taskStartTime,
shuffleEndTime));
sortRanges.add(
new TimeRange(TimeUnit.MILLISECONDS, shuffleEndTime,
sortEndTime));
reduceRanges.add(
new TimeRange(TimeUnit.MILLISECONDS, sortEndTime,
taskEndTime));
}
}
}
// output the data, tab-separated in the following order:
// time-offset #-map-tasks #-reduce-tasks #-shuffle-tasks #-sort-tasks #-waste-tasks
// steps of 1 second
StringBuilder sb = new StringBuilder();
sb.append("time")
.append("\tmap")
.append("\treduce")
.append("\tshuffle")
.append("\tsort")
;
System.err.println(sb);
int timeOffset = 0;
for (long i = startTime; i <= endTime; i += 1000) {
sb = new StringBuilder();
sb.append(timeOffset)
.append("\t").append(countRangesForTime(mapRanges, i))
.append("\t").append(countRangesForTime(reduceRanges, i))
.append("\t").append(countRangesForTime(shuffleRanges, i))
.append("\t").append(countRangesForTime(sortRanges, i))
;
System.err.println(sb);
timeOffset++;
}
}
public static int countRangesForTime(List<TimeRange> ranges,
long time) {
int count = 0;
for (TimeRange range : ranges) {
if (range.inRange(TimeUnit.MILLISECONDS, time)) {
count++;
}
}
return count;
}
public static class TimeRange {
final long startTimeMillis;
final long endTimeMillis;
public TimeRange(TimeUnit unit, long start, long end) {
startTimeMillis = unit.toMillis(start);
endTimeMillis = unit.toMillis(end);
}
public boolean inRange(TimeUnit unit, long value) {
long millis = unit.toMillis(value);
return millis >= startTimeMillis && millis <= endTimeMillis;
}
}
}