/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.net.InetAddress; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.util.Shell.ShellCommandExecutor; import org.apache.hadoop.util.StringUtils; /** * A {@link UtilizationGauger} which runs on Linux system */ public class LinuxUtilizationGauger extends UtilizationGauger { static private final String[] CMD = {"ps", "-eo", "pid,ppid,pcpu,rss,command"}; //pcpu: cpu utilization percentage of one core in "##.#" format. //rss: resident set size, the non-swapped physical memory that a // task has used (in kiloBytes). static private final int NUM_FIELDS = 5; static private final int PID = 0, PPID = 1, PCPU = 2, RSS = 3, COMMAND = 4; static private final Pattern psPattern = Pattern.compile( "([0-9]+)[ \t]+([0-9]+)[ \t]+([0-9.]+)[ \t]+([0-9.]+)[ \t]+(.*)"); static private final String MEM_INFO = "/proc/meminfo"; static private final String CPU_INFO = "/proc/cpuinfo"; public static final Log LOG = LogFactory.getLog("org.apache.hadoop.mapred.resourceutilization"); @Override public void initialGauge() { try { parseMemInfo(readFile(MEM_INFO)); parseCpuInfo(readFile(CPU_INFO)); ttUtilization.setHostName(InetAddress.getLocalHost().getHostName()); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); } } /** * Read a file line by line * @param fileName * @return String[] contains lines * @throws IOException */ private String[] readFile(String fileName) throws IOException { ArrayList<String> result = new ArrayList<String>(); FileReader fReader = new FileReader(fileName); BufferedReader bReader = new BufferedReader(fReader); while (true) { String line = bReader.readLine(); if (line == null) { break; } result.add(line); } bReader.close(); fReader.close(); return (String[])result.toArray(new String[result.size()]); } /** * read total memory from /proc directory */ protected void parseMemInfo(String[] memInfoFile) throws IOException { double memTotalGB = 0; Pattern pattern = Pattern.compile("MemTotal:[ \t]+([0-9]+)[ \t]+kB"); for (String line : memInfoFile) { Matcher matcher = pattern.matcher(line); if (matcher.find()) { memTotalGB = Double.parseDouble(matcher.group(1)) / 1000000d; break; } } ttUtilization.setMemTotalGB(memTotalGB); } /** * read total cpu information from /proc directory */ protected void parseCpuInfo(String[] cpuInfoFile) throws IOException { int numCpu = 0; double cpuTotalGHz = 0; for (String line : cpuInfoFile) { if (line.startsWith("processor")) { numCpu++; } if (line.startsWith("cpu MHz")) { cpuTotalGHz += Double.parseDouble(line.split(":")[1].trim()) / 1000d; } } ttUtilization.setNumCpu(numCpu); ttUtilization.setCpuTotalGHz(cpuTotalGHz); } /** * Execute "ps -eo pid,ppid,pcpu,rss,command" * @return String[] which contains the execution result */ protected String[] getPS() { ShellCommandExecutor shellExecutor = new ShellCommandExecutor(CMD); try { shellExecutor.execute(); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); return null; } return shellExecutor.getOutput().split("\n"); } /** * Parse PS results into fields * @param psStrings * @return fields contains the PS information */ private String[][] parsePS(String[] psStrings) { String[][] result = new String[psStrings.length-1][NUM_FIELDS]; for (int i = 1; i < psStrings.length; i++) { Matcher matcher = psPattern.matcher(psStrings[i]); if (matcher.find()) { for (int j = 0; j < NUM_FIELDS; j++) { result[i-1][j] = matcher.group(j+1); } } } return result; } // "ps -eo pcpu" gives per core %. We convert it to GHz private double percentageToGHz(double cpuUsage) { cpuUsage /= 100; cpuUsage /= ttUtilization.getNumCpu(); cpuUsage *= ttUtilization.getCpuTotalGHz(); return cpuUsage; } @Override public void gauge() { String [][] psResult = parsePS(getPS()); // Get the overall CPU and memory usage double cpuUsage = 0d; double memUsage = 0d; for (String[] psFields : psResult) { try { cpuUsage += Double.parseDouble(psFields[PCPU]); memUsage += Double.parseDouble(psFields[RSS]); } catch (NumberFormatException e) { // do nothing } } //"ps -eo pcpu" gives % per core. We convert it to GB. cpuUsage = percentageToGHz(cpuUsage); memUsage /= 1000000d; // "ps -eo rss" gives memory in kB ttUtilization.setMemUsageGB(memUsage); ttUtilization.setCpuUsageGHz(cpuUsage); // Index the results of PS by the pid Map<String, String[]> pidToContent = new HashMap<String, String[]>(); for (String[] psFields : psResult) { pidToContent.put(psFields[PID], psFields); } // Obtain all child processes of every process Map<String, LinkedList<String>> pidToChildPid = new HashMap<String, LinkedList<String>>(); for (String[] psFields : psResult) { if (!pidToChildPid.containsKey(psFields[PID])) { pidToChildPid.put(psFields[PID], new LinkedList<String>()); } if (!pidToChildPid.containsKey(psFields[PPID])) { pidToChildPid.put(psFields[PPID], new LinkedList<String>()); } pidToChildPid.get(psFields[PPID]).add(psFields[PID]); } // There can be multiple TaskTracker on one machine List<String> taskTrackerPidList = new LinkedList<String>(); double[] taskTrackerUsage = new double[2]; for (String[] psFields : psResult) { if (psFields[PPID].equals("1")) { if (psFields[COMMAND].matches(".*TaskTracker.*")) { taskTrackerPidList.add(psFields[PID]); // "ps -eo pcpu" gives per core %. We convert it to GHz taskTrackerUsage[0] += percentageToGHz(Double.parseDouble(psFields[PCPU])); // "ps -eo rss" gives memory in kB. We convert it to GB taskTrackerUsage[1] += Double.parseDouble(psFields[RSS]) / 1000000d; } } } if (taskTrackerPidList.isEmpty()) { localJobUtilization = null; return; } // Obtain all jobID String jobIDRegex = "(job_[0-9]+_[0-9]+)"; Map<String, double[]> jobIdToUsage = new HashMap<String, double[]>(); Pattern jobIdPattern = Pattern.compile(jobIDRegex); for (String[] psFields : psResult) { Matcher jobIdMatcher = jobIdPattern.matcher(psFields[COMMAND]); if (jobIdMatcher.find()) { String jobID = jobIdMatcher.group(1); if (!jobIdToUsage.containsKey(jobID)) { jobIdToUsage.put(jobID, new double[2]); } } } jobIdToUsage.put("TaskTracker", taskTrackerUsage); for (String ttPid : taskTrackerPidList) { for (String pid : pidToChildPid.get(ttPid)) { String[] psFields = pidToContent.get(pid); Matcher jobIdMatcher = jobIdPattern.matcher(psFields[COMMAND]); double[] jobUsage = getSubProcessUsage(pid, pidToContent, pidToChildPid); if (jobIdMatcher.find()) { String jobID = jobIdMatcher.group(1); jobIdToUsage.get(jobID)[0] += jobUsage[0]; jobIdToUsage.get(jobID)[1] += jobUsage[1]; } else { jobIdToUsage.get("TaskTracker")[0] += jobUsage[0]; jobIdToUsage.get("TaskTracker")[1] += jobUsage[1]; } } } // Write job information to TaskTrackerReport localJobUtilization = new LocalJobUtilization[jobIdToUsage.size()]; for ( int i = 0; i < jobIdToUsage.size(); i++) { localJobUtilization[i] = new LocalJobUtilization(); } int jobReportIndex = 0; for (String jobID : jobIdToUsage.keySet()) { localJobUtilization[jobReportIndex].setJobId(jobID); localJobUtilization[jobReportIndex].setCpuUsageGHz( jobIdToUsage.get(jobID)[0]); localJobUtilization[jobReportIndex].setMemUsageGB( jobIdToUsage.get(jobID)[1]); jobReportIndex++; } } /** * A function computes the Memory and CPU usage of all subprocess * @param pid PID of the process we are interested in * @param pidToContent Map between pid and the PS content * @param pidToChildPid Map between pid and pid of its child process * @return A 2-element array which contants CPU and memory usage */ private double[] getSubProcessUsage(String pid, Map<String, String[]> pidToContent, Map<String, LinkedList<String>> pidToChildPid) { double cpuMemUsage[] = new double[2]; Queue<String> pidQueue = new LinkedList<String>(); pidQueue.add(pid); while (!pidQueue.isEmpty()) { pid = pidQueue.poll(); for (String child : pidToChildPid.get(pid)) { pidQueue.add(child); } String[] psContent = pidToContent.get(pid); double cpuUsage = Double.parseDouble(psContent[PCPU]); cpuUsage = percentageToGHz(cpuUsage); double memUsage = Double.parseDouble(psContent[RSS]); // "ps -eo rss" gives memory in kB. We convert it in GB memUsage /= 1000000d; cpuMemUsage[0] += cpuUsage; cpuMemUsage[1] += memUsage; } return cpuMemUsage; } }