/* * Copyright 2012 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.linkedin.whiteelephant.parsing; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.linkedin.whiteelephant.parsing.Attempt; import com.linkedin.whiteelephant.parsing.DerivedAttemptData; import com.linkedin.whiteelephant.parsing.Job; import com.linkedin.whiteelephant.parsing.JobStatus; import com.linkedin.whiteelephant.parsing.Task; import com.linkedin.whiteelephant.parsing.TaskStatus; import com.linkedin.whiteelephant.parsing.TaskType; public class LineParsing { public enum AttemptParameter { TASKID, TASK_ATTEMPT_ID, TASK_STATUS, START_TIME, FINISH_TIME, SHUFFLE_FINISHED, SORT_FINISHED } private static String quotedTextPattern = "\"([^\"]+)\""; private static Pattern jobLinePattern = Pattern.compile(String.format("^Job JOBID=%s.*",quotedTextPattern)); private static Pattern jobPattern = Pattern.compile("job_\\d+_\\d+"); private static Pattern parameterPattern = Pattern.compile("([A-Z_]+)=" + quotedTextPattern); private static Pattern counterPattern = Pattern.compile("\\[\\(([A-Z_]+)\\)\\(.+?\\)\\((\\d+)\\)\\]"); private static Pattern taskPattern = Pattern.compile("task_(\\d+_\\d+)_[mr]_\\d+"); private static Pattern taskLinePattern = Pattern.compile(String.format("Task TASKID=%s TASK_TYPE=\"(MAP|REDUCE)\".+",quotedTextPattern)); private static Pattern attemptLinePattern = Pattern.compile("^(Map|Reduce)Attempt TASK_TYPE=\"(MAP|REDUCE)\".+"); public static Job tryParseJob(String line) { // these mess with our pattern matching line = line.replace("\\\"", ""); Job job = null; Matcher m = jobLinePattern.matcher(line); if (m.matches()) { job = new Job(); job.setJobId(m.group(1)); Matcher paramMatcher = parameterPattern.matcher(line); while (paramMatcher.find()) { String name = paramMatcher.group(1); String value = paramMatcher.group(2); maybeSetJobParam(job, name, value); } } else if (line.indexOf("USER=") >= 0) { Matcher jobMatcher = jobPattern.matcher(line); if (jobMatcher.find()) { String jobId = jobMatcher.group(); job = new Job(); job.setJobId(jobId); Matcher paramMatcher = parameterPattern.matcher(line); while (paramMatcher.find()) { String name = paramMatcher.group(1); String value = paramMatcher.group(2); maybeSetJobParam(job, name, value); } } } return job; } private static void maybeSetJobParam(Job job, String name, String value) { if (name.equals("USER")) { job.setUser(value); } else if (name.equals("JOBNAME")) { job.setJobName(value); } else if (name.equals("JOB_QUEUE")) { job.setJobQueue(value); } else if (name.equals("JOB_STATUS")) { if (value.equals("SUCCESS") || value.equals("FAILURE")) { job.setJobStatus(JobStatus.valueOf(value)); } } else if (name.equals("SUBMIT_TIME")) { job.setSubmitTime(Long.parseLong(value)); } else if (name.equals("LAUNCH_TIME")) { job.setLaunchTime(Long.parseLong(value)); } else if (name.equals("FINISH_TIME")) { job.setFinishTime(Long.parseLong(value)); } else if (name.equals("TOTAL_MAPS")) { job.setTotalMaps(Integer.parseInt(value)); } else if (name.equals("TOTAL_REDUCES")) { job.setTotalReduces(Integer.parseInt(value)); } else if (name.equals("FINISHED_MAPS")) { job.setFinishedMaps(Integer.parseInt(value)); } else if (name.equals("FINISHED_REDUCES")) { job.setFinishedReduces(Integer.parseInt(value)); } else if (name.equals("FAILED_MAPS")) { job.setFailedMaps(Integer.parseInt(value)); } else if (name.equals("FAILED_REDUCES")) { job.setFailedReduces(Integer.parseInt(value)); } } public static Attempt tryParseAttempt(String line) { // these mess with our pattern matching line = line.replace("\\\"", ""); Attempt attempt = null; Matcher m = attemptLinePattern.matcher(line); if (m.matches()) { attempt = new Attempt(); attempt.setCounters(new HashMap<CharSequence,Long>()); attempt.setDerived(new DerivedAttemptData()); attempt.setType(TaskType.valueOf(m.group(1).toUpperCase())); Matcher matcher = parameterPattern.matcher(line); while (matcher.find()) { String name = matcher.group(1); String value = matcher.group(2); maybeSetAttemptParam(attempt,name,value); } matcher = counterPattern.matcher(line); while (matcher.find()) { String name = matcher.group(1); String value = matcher.group(2); setAttemptCounter(attempt,name,Long.parseLong(value)); } if (attempt.getTaskId() != null) { Matcher taskMatcher = taskPattern.matcher(attempt.getTaskId()); if (taskMatcher.matches()) { String jobId = String.format("job_%s",taskMatcher.group(1)); attempt.setJobId(jobId); } else { System.out.println("Could not match task ID for " + attempt.getTaskId()); System.out.println("line: " + line); } } else { System.out.println("Could not find task ID"); System.out.println("line: " + line); attempt = null; } } return attempt; } private static void maybeSetAttemptParam(Attempt attempt, String name, String value) { try { AttemptParameter param = AttemptParameter.valueOf(name); if (param.equals(AttemptParameter.TASKID)) { attempt.setTaskId(value); } else if (param.equals(AttemptParameter.TASK_ATTEMPT_ID)) { attempt.setTaskAttemptId(value); } else if (param.equals(AttemptParameter.TASK_STATUS)) { attempt.setTaskStatus(TaskStatus.valueOf(value)); } else if (param.equals(AttemptParameter.START_TIME)) { attempt.setStartTime(Long.parseLong(value)); } else if (param.equals(AttemptParameter.FINISH_TIME)) { attempt.setFinishTime(Long.parseLong(value)); } else if (param.equals(AttemptParameter.SHUFFLE_FINISHED)) { attempt.setShuffleFinished(Long.parseLong(value)); } else if (param.equals(AttemptParameter.SORT_FINISHED)) { attempt.setSortFinished(Long.parseLong(value)); } } catch (IllegalArgumentException e) { // ignore these, it means the enum isn't one we care about } } private static void setAttemptCounter(Attempt attempt, String name, Long value) { attempt.getCounters().put(name, value); } public static Task tryParseTask(String line) { // these mess with our pattern matching line = line.replace("\\\"", ""); Task task = null; Matcher m = taskLinePattern.matcher(line); if (m.matches()) { task = new Task(); task.setType(TaskType.valueOf(m.group(2).toUpperCase())); Matcher matcher = parameterPattern.matcher(line); while (matcher.find()) { String name = matcher.group(1); String value = matcher.group(2); maybeSetTaskParam(task,name,value); } if (task.getTaskId() != null) { Matcher taskMatcher = taskPattern.matcher(task.getTaskId()); if (taskMatcher.matches()) { String jobId = String.format("job_%s",taskMatcher.group(1)); task.setJobId(jobId); } else { System.out.println("Could not match task ID for " + task.getTaskId()); System.out.println("line: " + line); } } else { System.out.println("Could not find task ID"); System.out.println("line: " + line); task = null; } } return task; } private static void maybeSetTaskParam(Task attempt, String name, String value) { if (name.equals("TASKID")) { attempt.setTaskId(value); } else if (name.equals("TASK_STATUS")) { if (value.equals("SUCCESS") || value.equals("FAILURE")) { attempt.setTaskStatus(TaskStatus.valueOf(value)); } } else if (name.equals("START_TIME")) { attempt.setStartTime(Long.parseLong(value)); } else if (name.equals("FINISH_TIME")) { attempt.setFinishTime(Long.parseLong(value)); } } }