/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.corona.CoronaConf; import org.apache.hadoop.corona.NodeUsageReport; /** * Maintains statistics about all the tasks. The Corona Job Tracker can call the * record functions to update various statistics. It can find if a tracker is * blacklisted or get a summary of the statistics. Note that the all the * recording functions require the tracker name, which is obtained through the * node name of resource grants. This is different from the tracker host. */ public class TrackerStats { /** Usage report for a tracker. */ private final Map<String, NodeUsageReport> usageReports = new HashMap<String, NodeUsageReport>(); /** Max failed connections before consider a tracker faulty for this job. */ private final int maxFailedConnections; /** Max failed tasks before consider a tracker faulty for this job. */ private final int maxFailures; /** The list of trackers that were declared dead */ private final Set<String> deadTrackers = new HashSet<String>(); /** * Constructor. * @param conf The configuration. */ public TrackerStats(Configuration conf) { CoronaConf coronaConf = new CoronaConf(conf); maxFailedConnections = coronaConf.getMaxFailedConnectionsPerSession(); maxFailures = coronaConf.getMaxFailuresPerSession(); } /** * Check if a tracker is faulty. * @param trackerName The name of the tracker. * @return A boolean indicating if the tracker is faulty. */ public boolean isFaulty(String trackerName) { synchronized (this) { NodeUsageReport usageReport = usageReports.get(trackerName); return isDeadTracker(trackerName) || (usageReport != null && (usageReport.getNumFailedConnections() > maxFailedConnections || usageReport.getNumFailed() > maxFailures)); } } /** * Get the number of faulty trackers. * @return the number of faulty trackers. */ public int getNumFaultyTrackers() { int count = 0; synchronized (this) { for (String trackerName : usageReports.keySet()) { if (isFaulty(trackerName)) { count++; } } } return count; } /** * This tracker has been declared dead by some external force * and we are recording it for future reference * * @param trackerName the name of the tracker that was declared dead */ public void recordDeadTracker(String trackerName) { synchronized (this) { deadTrackers.add(trackerName); } } /** * Has the tracker been declared dead * * @param trackerName the name of the tracker to check for deadness * @return true if the tracker has been declared dead, false otherwise */ public boolean isDeadTracker(String trackerName) { synchronized (this) { return deadTrackers.contains(trackerName); } } /** * Increment the number of tasks assigned to a tracker. * @param trackerName The name of the tracker. */ public void recordTask(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport.setNumTotalTasks(usageReport.getNumTotalTasks() + 1); } } /** * Increment the number of succeeded tasks on a tracker. * @param trackerName The name of the tracker. */ public void recordSucceededTask(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport.setNumSucceeded(usageReport.getNumSucceeded() + 1); } } /** * Increment the number of killed tasks on a tracker. * @param trackerName The name of the tracker. */ public void recordKilledTask(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport.setNumKilled(usageReport.getNumKilled() + 1); } } /** * Increment the number of failed tasks on a tracker. * @param trackerName The name of the tracker. */ public void recordFailedTask(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport.setNumFailed(usageReport.getNumFailed() + 1); } } /** * Set the number of failed tasks on a tracker to 0. */ public void resetFailedCount() { synchronized (this) { for (NodeUsageReport report : usageReports.values()) { report.setNumFailed(0); } } } /** * Increment the number of timeouts (expired launch) on a tracker. * @param trackerName The name of the tracker. */ public void recordTimeout(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport.setNumTimeout(usageReport.getNumTimeout() + 1); } } /** * Increment the number of tasks that ran slowly on a tracker. * @param trackerName The name of the tracker. */ public void recordSlowTask(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport.setNumSlow(usageReport.getNumSlow() + 1); } } /** * Increment the number of connection errors encountered on a tracker. * @param trackerName The name of the tracker. */ public void recordConnectionError(String trackerName) { synchronized (this) { NodeUsageReport usageReport = getReportUnprotected(trackerName); usageReport .setNumFailedConnections(usageReport.getNumFailedConnections() + 1); } } /** * Get the usage reports for all trackers. * @return A list of {@link NodeUsageReport}. */ public List<NodeUsageReport> getNodeUsageReports() { synchronized (this) { return new ArrayList<NodeUsageReport>(usageReports.values()); } } /** * Get the usage report for a tracker. * @param trackerName The name of the tracker. * @return The {@link NodeUsageReport} for the tracker. */ private NodeUsageReport getReportUnprotected(String trackerName) { NodeUsageReport usageReport = usageReports.get(trackerName); if (usageReport == null) { usageReport = new NodeUsageReport(trackerName, 0, 0, 0, 0, 0, 0, 0); usageReports.put(trackerName, usageReport); } return usageReport; } }