package hip.ch8; import org.apache.commons.lang.reflect.MethodUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.mapred.Counters; import org.apache.hadoop.mapred.DefaultJobHistoryParser; import org.apache.hadoop.mapred.JobHistory; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.text.ParseException; import java.util.ArrayList; import java.util.List; import java.util.Map; import static org.apache.hadoop.mapred.Task.Counter.*; public final class JobHistoryHelper { public static JobHistory.JobInfo getJobInfoFromCliArgs(String ... args) throws IOException { return getJobInfoFromCliArgs(new Configuration(), args); } public static JobHistory.JobInfo getJobInfoFromCliArgs(Configuration conf, String ... args) throws IOException { String usage = "Expected 2 arguments, either --hdfsdir <dir> or --localfile <path>"; if(args.length != 2) { throw new IOException(usage); } if("--hdfsdir".equals(args[0])) { return getJobInfoFromHdfsOutputDir(args[1], conf); } else if("--localfile".equals(args[0])) { return getJobInfoFromLocalFile(args[1], conf); } throw new IOException("Unexpected option '" + args[0] + "' \n" + usage); } public static PathFilter jobLogFileFilter = new PathFilter() { public boolean accept(Path path) { return !(path.getName().endsWith(".xml")); } }; public static JobHistory.JobInfo getJobInfoFromHdfsOutputDir(String outputDir, Configuration conf) throws IOException { Path output = new Path(outputDir); Path historyLogDir = new Path(output, "_logs/history"); FileSystem fs = output.getFileSystem(conf); if (!fs.exists(output)) { throw new IOException("History directory " + historyLogDir.toString() + " does not exist"); } Path[] jobFiles = FileUtil.stat2Paths(fs.listStatus(historyLogDir, jobLogFileFilter)); if (jobFiles.length == 0) { throw new IOException("Not a valid history directory " + historyLogDir.toString()); } String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(jobFiles[0].getName()). split("_"); String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4]; JobHistory.JobInfo job = new JobHistory.JobInfo(jobId); DefaultJobHistoryParser.parseJobTasks(jobFiles[0].toString(), job, fs); return job; } public static JobHistory.JobInfo getJobInfoFromLocalFile(String outputFile, Configuration conf) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path outputFilePath = new Path(outputFile); String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(outputFilePath.getName()). split("_"); String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4]; JobHistory.JobInfo job = new JobHistory.JobInfo(jobId); DefaultJobHistoryParser.parseJobTasks(outputFile, job, fs); return job; } public static List<TaskMetrics> getMapTaskMetrics( JobHistory.JobInfo job) throws ParseException { List<TaskMetrics> metrics = new ArrayList<TaskMetrics>(); addTask(metrics, job, JobHistory.Values.MAP.name()); return metrics; } public static List<TaskMetrics> getReduceTaskMetrics( JobHistory.JobInfo job) throws ParseException { List<TaskMetrics> metrics = new ArrayList<TaskMetrics>(); addTask(metrics, job, JobHistory.Values.REDUCE.name()); return metrics; } public static long extractLongFieldValue(TaskMetrics m, String fieldName) throws IllegalAccessException, InvocationTargetException, NoSuchMethodException { return (Long) MethodUtils.invokeMethod(m, fieldName, null); } public static void addTask(List<TaskMetrics> metrics, JobHistory.JobInfo job, String taskType) throws ParseException { Map<String, JobHistory.Task> tasks = job.getAllTasks(); for (JobHistory.Task task : tasks.values()) { for (JobHistory.TaskAttempt attempt : task.getTaskAttempts() .values()) { if (taskType.equals(task.get(JobHistory.Keys.TASK_TYPE))) { TaskMetrics metric = new TaskMetrics(); metrics.add(metric); metric.setType(taskType) .setTaskId(attempt.get(JobHistory.Keys.TASK_ATTEMPT_ID)) .setHost(attempt.get(JobHistory.Keys.HOSTNAME)) .setStatus(attempt.get(JobHistory.Keys.TASK_STATUS)); long taskOverallTime = attempt.getLong(JobHistory.Keys.FINISH_TIME) - attempt.getLong(JobHistory.Keys.START_TIME); metric.setOverallTimeMillis(taskOverallTime); metric.setInputBytes( extractNumericCounter( attempt.get(JobHistory.Keys.COUNTERS), MAP_INPUT_BYTES.name(), REDUCE_SHUFFLE_BYTES.name())); metric.setOutputBytes( extractNumericCounter( attempt.get(JobHistory.Keys.COUNTERS), MAP_OUTPUT_BYTES.name(), "HDFS_BYTES_WRITTEN")); metric.setInputRecords( extractNumericCounter( attempt.get(JobHistory.Keys.COUNTERS), MAP_INPUT_RECORDS.name(), REDUCE_INPUT_RECORDS.name())); metric.setOutputRecords( extractNumericCounter( attempt.get(JobHistory.Keys.COUNTERS), MAP_OUTPUT_RECORDS.name(), REDUCE_OUTPUT_RECORDS.name())); if (JobHistory.Values.REDUCE.name() .equals(task.get(JobHistory.Keys.TASK_TYPE))) { long shuffleTime = attempt.getLong(JobHistory.Keys.SHUFFLE_FINISHED) - attempt.getLong(JobHistory.Keys.START_TIME); long sortTime = attempt.getLong(JobHistory.Keys.SORT_FINISHED) - attempt .getLong(JobHistory.Keys.SHUFFLE_FINISHED); metric.setShuffleTimeMillis(shuffleTime); metric.setSortTimeMillis(sortTime); } } } } } public static long extractNumericCounter(String counterFromHist, String... counterNames) throws ParseException { long result = -1; String s = extractCounter(counterFromHist, counterNames); if (s != null) { result = Long.valueOf(s); } return result; } public static String extractCounter(String counterFromHist, String... counterNames) throws ParseException { Counters counters = Counters.fromEscapedCompactString(counterFromHist); for (Counters.Group group : counters) { for (Counters.Counter counter : group) { for (String counterName : counterNames) { if (counterName.equals(counter.getName())) { return String.valueOf(counter.getCounter()); } } } } return null; } public static String formatTime(long timeDiffMillis) { StringBuilder buf = new StringBuilder(); long hours = timeDiffMillis / (60 * 60 * 1000); long rem = (timeDiffMillis % (60 * 60 * 1000)); long minutes = rem / (60 * 1000); rem = rem % (60 * 1000); long seconds = rem / 1000; if (hours != 0) { buf.append(hours); buf.append("h"); } if (hours != 0 || minutes != 0) { if (buf.length() > 0) { buf.append(" "); } buf.append(minutes); buf.append("m"); } if (buf.length() > 0) { buf.append(" "); } buf.append(seconds) .append("s"); return buf.toString(); } }