/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** MODIFIED FOR GPGPU Usage! **/ package org.apache.hadoop.mapred; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.Writer; import java.lang.management.ManagementFactory; import java.net.BindException; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.security.PrivilegedExceptionAction; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.Vector; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.http.HttpServer; import org.apache.hadoop.io.Text; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.VersionMismatch; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.mapred.AuditLogger.Constants; import org.apache.hadoop.mapred.JobHistory.Keys; import org.apache.hadoop.mapred.JobHistory.Listener; import org.apache.hadoop.mapred.JobHistory.Values; import org.apache.hadoop.mapred.JobInProgress.KillInterruptedException; import org.apache.hadoop.mapred.JobStatusChangeEvent.EventType; import org.apache.hadoop.mapred.QueueManager.QueueACL; import org.apache.hadoop.mapred.TaskTrackerStatus.TaskTrackerHealthStatus; import org.apache.hadoop.mapreduce.ClusterMetrics; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.security.token.DelegationTokenRenewal; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.net.DNSToSwitchMapping; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.net.ScriptBasedMapping; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.Groups; import org.apache.hadoop.security.RefreshUserMappingsProtocol; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; import org.apache.hadoop.security.authorize.ServiceAuthorizationManager; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.HostsFileReader; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.VersionInfo; import org.mortbay.util.ajax.JSON; /******************************************************* * JobTracker is the central location for submitting and * tracking MR jobs in a network environment. * *******************************************************/ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmissionProtocol, TaskTrackerManager, RefreshUserMappingsProtocol, RefreshAuthorizationPolicyProtocol, AdminOperationsProtocol, JobTrackerMXBean { static{ Configuration.addDefaultResource("mapred-default.xml"); Configuration.addDefaultResource("mapred-site.xml"); } static long TASKTRACKER_EXPIRY_INTERVAL = 10 * 60 * 1000; static long RETIRE_JOB_INTERVAL; static long RETIRE_JOB_CHECK_INTERVAL; private final long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour private final DelegationTokenSecretManager secretManager; // The maximum fraction (range [0.0-1.0]) of nodes in cluster allowed to be // added to the all-jobs blacklist via heuristics. By default, no more than // 50% of the cluster can be heuristically blacklisted, but the external // node-healthcheck script is not affected by this. private static double MAX_BLACKLIST_FRACTION = 0.5; // A tracker is blacklisted across jobs only if number of faults is more // than X% above the average number of faults (averaged across all nodes // in cluster). X is the blacklist threshold here; 0.3 would correspond // to 130% of the average, for example. private double AVERAGE_BLACKLIST_THRESHOLD = 0.5; // Fault threshold (number occurring within TRACKER_FAULT_TIMEOUT_WINDOW) // to consider a task tracker bad enough to blacklist heuristically. This // is functionally the same as the older "MAX_BLACKLISTS_PER_TRACKER" value. private int TRACKER_FAULT_THRESHOLD; // = 4; // Width of overall fault-tracking sliding window (in minutes). (Default // of 24 hours matches previous "UPDATE_FAULTY_TRACKER_INTERVAL" value that // was used to forgive a single fault if no others occurred in the interval.) private int TRACKER_FAULT_TIMEOUT_WINDOW; // = 180 (3 hours) // Width of a single fault-tracking bucket (in minutes). private int TRACKER_FAULT_BUCKET_WIDTH; // = 15 private long TRACKER_FAULT_BUCKET_WIDTH_MSECS; // Basically TRACKER_FAULT_TIMEOUT_WINDOW / TRACKER_FAULT_BUCKET_WIDTH . private int NUM_FAULT_BUCKETS; /** the maximum allowed size of the jobconf **/ long MAX_JOBCONF_SIZE = 5*1024*1024L; /** the config key for max user jobconf size **/ public static final String MAX_USER_JOBCONF_SIZE_KEY = "mapred.user.jobconf.limit"; // Delegation token related keys public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = "mapreduce.cluster.delegation.key.update-interval"; public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = 24*60*60*1000; // 1 day public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = "mapreduce.cluster.delegation.token.renew-interval"; public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = 24*60*60*1000; // 1 day public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = "mapreduce.cluster.delegation.token.max-lifetime"; public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = 7*24*60*60*1000; // 7 days // Approximate number of heartbeats that could arrive JobTracker // in a second static final String JT_HEARTBEATS_IN_SECOND = "mapred.heartbeats.in.second"; private int NUM_HEARTBEATS_IN_SECOND; private static final int DEFAULT_NUM_HEARTBEATS_IN_SECOND = 100; private static final int MIN_NUM_HEARTBEATS_IN_SECOND = 1; // Scaling factor for heartbeats, used for testing only static final String JT_HEARTBEATS_SCALING_FACTOR = "mapreduce.jobtracker.heartbeats.scaling.factor"; private float HEARTBEATS_SCALING_FACTOR; private final float MIN_HEARTBEATS_SCALING_FACTOR = 0.01f; private final float DEFAULT_HEARTBEATS_SCALING_FACTOR = 1.0f; public static enum State { INITIALIZING, RUNNING } State state = State.INITIALIZING; private static final int FS_ACCESS_RETRY_PERIOD = 10000; static final String JOB_INFO_FILE = "job-info"; private DNSToSwitchMapping dnsToSwitchMapping; private NetworkTopology clusterMap = new NetworkTopology(); private int numTaskCacheLevels; // the max level to which we cache tasks /** * {@link #nodesAtMaxLevel} is using the keySet from {@link ConcurrentHashMap} * so that it can be safely written to and iterated on via 2 separate threads. * Note: It can only be iterated from a single thread which is feasible since * the only iteration is done in {@link JobInProgress} under the * {@link JobTracker} lock. */ private Set<Node> nodesAtMaxLevel = Collections.newSetFromMap(new ConcurrentHashMap<Node, Boolean>()); private final TaskScheduler taskScheduler; private final List<JobInProgressListener> jobInProgressListeners = new CopyOnWriteArrayList<JobInProgressListener>(); private static final LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir"); //system directory is completely owned by the JobTracker final static FsPermission SYSTEM_DIR_PERMISSION = FsPermission.createImmutable((short) 0700); // rwx------ // system files should have 700 permission final static FsPermission SYSTEM_FILE_PERMISSION = FsPermission.createImmutable((short) 0700); // rwx------ private Clock clock; private final JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager(); JobTokenSecretManager getJobTokenSecretManager() { return jobTokenSecretManager; } /** * A client tried to submit a job before the Job Tracker was ready. */ public static class IllegalStateException extends IOException { private static final long serialVersionUID = 1L; public IllegalStateException(String msg) { super(msg); } } /** * The maximum no. of 'completed' (successful/failed/killed) * jobs kept in memory per-user. */ final int MAX_COMPLETE_USER_JOBS_IN_MEMORY; /** * The minimum time (in ms) that a job's information has to remain * in the JobTracker's memory before it is retired. */ static final int MIN_TIME_BEFORE_RETIRE = 0; private int nextJobId = 1; public static final Log LOG = LogFactory.getLog(JobTracker.class); static final String CONF_VERSION_KEY = "mapreduce.jobtracker.conf.version"; static final String CONF_VERSION_DEFAULT = "default"; public Clock getClock() { return clock; } /** * Start the JobTracker with given configuration. * * The conf will be modified to reflect the actual ports on which * the JobTracker is up and running if the user passes the port as * <code>zero</code>. * * @param conf configuration for the JobTracker. * @throws IOException */ public static JobTracker startTracker(JobConf conf ) throws IOException, InterruptedException { return startTracker(conf, generateNewIdentifier()); } public static JobTracker startTracker(JobConf conf, String identifier) throws IOException, InterruptedException { DefaultMetricsSystem.initialize("JobTracker"); JobTracker result = null; while (true) { try { result = new JobTracker(conf, identifier); result.taskScheduler.setTaskTrackerManager(result); break; } catch (VersionMismatch e) { throw e; } catch (BindException e) { throw e; } catch (UnknownHostException e) { throw e; } catch (AccessControlException ace) { // in case of jobtracker not having right access // bail out throw ace; } catch (IOException e) { LOG.warn("Error starting tracker: " + StringUtils.stringifyException(e)); } Thread.sleep(1000); } if (result != null) { JobEndNotifier.startNotifier(); MBeans.register("JobTracker", "JobTrackerInfo", result); } return result; } public void stopTracker() throws IOException { JobEndNotifier.stopNotifier(); close(); } public long getProtocolVersion(String protocol, long clientVersion) throws IOException { if (protocol.equals(InterTrackerProtocol.class.getName())) { return InterTrackerProtocol.versionID; } else if (protocol.equals(JobSubmissionProtocol.class.getName())){ return JobSubmissionProtocol.versionID; } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ return RefreshAuthorizationPolicyProtocol.versionID; } else if (protocol.equals(AdminOperationsProtocol.class.getName())){ return AdminOperationsProtocol.versionID; } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ return RefreshUserMappingsProtocol.versionID; } else { throw new IOException("Unknown protocol to job tracker: " + protocol); } } public DelegationTokenSecretManager getDelegationTokenSecretManager() { return secretManager; } /** * A thread to timeout tasks that have been assigned to task trackers, * but that haven't reported back yet. * Note that I included a stop() method, even though there is no place * where JobTrackers are cleaned up. */ private class ExpireLaunchingTasks implements Runnable { /** * This is a map of the tasks that have been assigned to task trackers, * but that have not yet been seen in a status report. * map: task-id -> time-assigned */ private Map<TaskAttemptID, Long> launchingTasks = new LinkedHashMap<TaskAttemptID, Long>(); public void run() { while (true) { try { // Every 3 minutes check for any tasks that are overdue Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL/3); long now = clock.getTime(); if(LOG.isDebugEnabled()) { LOG.debug("Starting launching task sweep"); } synchronized (JobTracker.this) { synchronized (launchingTasks) { Iterator<Map.Entry<TaskAttemptID, Long>> itr = launchingTasks.entrySet().iterator(); while (itr.hasNext()) { Map.Entry<TaskAttemptID, Long> pair = itr.next(); TaskAttemptID taskId = pair.getKey(); long age = now - (pair.getValue()).longValue(); LOG.info(taskId + " is " + age + " ms debug."); if (age > TASKTRACKER_EXPIRY_INTERVAL) { LOG.info("Launching task " + taskId + " timed out."); TaskInProgress tip = null; tip = taskidToTIPMap.get(taskId); if (tip != null) { JobInProgress job = tip.getJob(); String trackerName = getAssignedTracker(taskId); TaskTrackerStatus trackerStatus = getTaskTrackerStatus(trackerName); // This might happen when the tasktracker has already // expired and this thread tries to call failedtask // again. expire tasktracker should have called failed // task! if (trackerStatus != null) job.failedTask(tip, taskId, "Error launching task", tip.isMapTask()? TaskStatus.Phase.MAP: TaskStatus.Phase.STARTING, TaskStatus.State.FAILED, trackerName); } itr.remove(); } else { // the tasks are sorted by start time, so once we find // one that we want to keep, we are done for this cycle. break; } } } } } catch (InterruptedException ie) { // all done break; } catch (Exception e) { LOG.error("Expire Launching Task Thread got exception: " + StringUtils.stringifyException(e)); } } } public void addNewTask(TaskAttemptID taskName) { synchronized (launchingTasks) { launchingTasks.put(taskName, clock.getTime()); } } public void removeTask(TaskAttemptID taskName) { synchronized (launchingTasks) { launchingTasks.remove(taskName); } } } /////////////////////////////////////////////////////// // Used to expire TaskTrackers that have gone down /////////////////////////////////////////////////////// class ExpireTrackers implements Runnable { public ExpireTrackers() { } /** * The run method lives for the life of the JobTracker, and removes TaskTrackers * that have not checked in for some time. */ public void run() { while (true) { try { // // Thread runs periodically to check whether trackers should be expired. // The sleep interval must be no more than half the maximum expiry time // for a task tracker. // Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3); // // Loop through all expired items in the queue // // Need to lock the JobTracker here since we are // manipulating it's data-structures via // ExpireTrackers.run -> JobTracker.lostTaskTracker -> // JobInProgress.failedTask -> JobTracker.markCompleteTaskAttempt // Also need to lock JobTracker before locking 'taskTracker' & // 'trackerExpiryQueue' to prevent deadlock: // @see {@link JobTracker.processHeartbeat(TaskTrackerStatus, boolean, long)} synchronized (JobTracker.this) { synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { long now = clock.getTime(); TaskTrackerStatus leastRecent = null; while ((trackerExpiryQueue.size() > 0) && (leastRecent = trackerExpiryQueue.first()) != null && ((now - leastRecent.getLastSeen()) > TASKTRACKER_EXPIRY_INTERVAL)) { // Remove profile from head of queue trackerExpiryQueue.remove(leastRecent); String trackerName = leastRecent.getTrackerName(); // Figure out if last-seen time should be updated, or if tracker is dead TaskTracker current = getTaskTracker(trackerName); TaskTrackerStatus newProfile = (current == null ) ? null : current.getStatus(); // Items might leave the taskTracker set through other means; the // status stored in 'taskTrackers' might be null, which means the // tracker has already been destroyed. if (newProfile != null) { if ((now - newProfile.getLastSeen()) > TASKTRACKER_EXPIRY_INTERVAL) { removeTracker(current); // remove the mapping from the hosts list String hostname = newProfile.getHost(); hostnameToTaskTracker.get(hostname).remove(trackerName); } else { // Update time by inserting latest profile trackerExpiryQueue.add(newProfile); } } } } } } } catch (InterruptedException iex) { break; } catch (Exception t) { LOG.error("Tracker Expiry Thread got exception: " + StringUtils.stringifyException(t)); } } } } synchronized void historyFileCopied(JobID jobid, String historyFile) { JobInProgress job = getJob(jobid); if (job != null) { //found in main cache if (historyFile != null) { job.setHistoryFile(historyFile); } return; } RetireJobInfo jobInfo = retireJobs.get(jobid); if (jobInfo != null) { //found in retired cache if (historyFile != null) { jobInfo.setHistoryFile(historyFile); } } } static class RetireJobInfo { final JobStatus status; final JobProfile profile; final long finishTime; final Counters counters; private String historyFile; RetireJobInfo(Counters counters, JobStatus status, JobProfile profile, long finishTime, String historyFile) { this.counters = counters; this.status = status; this.profile = profile; this.finishTime = finishTime; this.historyFile = historyFile; } void setHistoryFile(String file) { this.historyFile = file; } String getHistoryFile() { return historyFile; } } /////////////////////////////////////////////////////// // Used to remove old finished Jobs that have been around for too long /////////////////////////////////////////////////////// class RetireJobs implements Runnable { private final Map<JobID, RetireJobInfo> jobIDStatusMap = new HashMap<JobID, RetireJobInfo>(); private final LinkedList<RetireJobInfo> jobRetireInfoQ = new LinkedList<RetireJobInfo>(); public RetireJobs() { } synchronized void addToCache(JobInProgress job) { Counters counters = new Counters(); boolean isFine = job.getCounters(counters); counters = (isFine? counters: new Counters()); RetireJobInfo info = new RetireJobInfo(counters, job.getStatus(), job.getProfile(), job.getFinishTime(), job.getHistoryFile()); jobRetireInfoQ.add(info); jobIDStatusMap.put(info.status.getJobID(), info); if (jobRetireInfoQ.size() > retiredJobsCacheSize) { RetireJobInfo removed = jobRetireInfoQ.remove(); jobIDStatusMap.remove(removed.status.getJobID()); LOG.info("Retired job removed from cache " + removed.status.getJobID()); } } synchronized RetireJobInfo get(JobID jobId) { return jobIDStatusMap.get(jobId); } @SuppressWarnings("unchecked") synchronized LinkedList<RetireJobInfo> getAll() { return (LinkedList<RetireJobInfo>) jobRetireInfoQ.clone(); } synchronized LinkedList<JobStatus> getAllJobStatus() { LinkedList<JobStatus> list = new LinkedList<JobStatus>(); for (RetireJobInfo info : jobRetireInfoQ) { list.add(info.status); } return list; } private boolean minConditionToRetire(JobInProgress job, long now) { return job.getStatus().getRunState() != JobStatus.RUNNING && job.getStatus().getRunState() != JobStatus.PREP && (job.getFinishTime() + MIN_TIME_BEFORE_RETIRE < now); } /** * The run method lives for the life of the JobTracker, * and removes Jobs that are not still running, but which * finished a long time ago. */ public void run() { while (true) { try { Thread.sleep(RETIRE_JOB_CHECK_INTERVAL); List<JobInProgress> retiredJobs = new ArrayList<JobInProgress>(); long now = clock.getTime(); long retireBefore = now - RETIRE_JOB_INTERVAL; synchronized (jobs) { for(JobInProgress job: jobs.values()) { if (minConditionToRetire(job, now) && (job.getFinishTime() < retireBefore)) { retiredJobs.add(job); } } } synchronized (userToJobsMap) { Iterator<Map.Entry<String, ArrayList<JobInProgress>>> userToJobsMapIt = userToJobsMap.entrySet().iterator(); while (userToJobsMapIt.hasNext()) { Map.Entry<String, ArrayList<JobInProgress>> entry = userToJobsMapIt.next(); ArrayList<JobInProgress> userJobs = entry.getValue(); Iterator<JobInProgress> it = userJobs.iterator(); while (it.hasNext() && userJobs.size() > MAX_COMPLETE_USER_JOBS_IN_MEMORY) { JobInProgress jobUser = it.next(); if (retiredJobs.contains(jobUser)) { LOG.info("Removing from userToJobsMap: " + jobUser.getJobID()); it.remove(); } else if (minConditionToRetire(jobUser, now)) { LOG.info("User limit exceeded. Marking job: " + jobUser.getJobID() + " for retire."); retiredJobs.add(jobUser); it.remove(); } } if (userJobs.isEmpty()) { userToJobsMapIt.remove(); } } } if (!retiredJobs.isEmpty()) { synchronized (JobTracker.this) { synchronized (jobs) { synchronized (taskScheduler) { for (JobInProgress job: retiredJobs) { removeJobTasks(job); jobs.remove(job.getProfile().getJobID()); for (JobInProgressListener l : jobInProgressListeners) { l.jobRemoved(job); } String jobUser = job.getProfile().getUser(); LOG.info("Retired job with id: '" + job.getProfile().getJobID() + "' of user '" + jobUser + "'"); // clean up job files from the local disk JobHistory.JobInfo.cleanupJob(job.getProfile().getJobID()); addToCache(job); } } } } } } catch (InterruptedException t) { break; } catch (Throwable t) { LOG.error("Error in retiring job:\n" + StringUtils.stringifyException(t)); } } } } enum ReasonForBlackListing { EXCEEDING_FAILURES, NODE_UNHEALTHY } // FaultInfo: data structure that tracks the number of faults of a single // TaskTracker, when the last fault occurred, and whether the TaskTracker // is blacklisted across all jobs or not. private static class FaultInfo { static final String FAULT_FORMAT_STRING = "%d failures on the tracker"; int[] numFaults; // timeslice buckets long lastRotated; // 1st millisecond of current bucket boolean blacklisted; boolean graylisted; private int numFaultBuckets; private long bucketWidth; private HashMap<ReasonForBlackListing, String> blackRfbMap; private HashMap<ReasonForBlackListing, String> grayRfbMap; FaultInfo(long time, int numFaultBuckets, long bucketWidth) { this.numFaultBuckets = numFaultBuckets; this.bucketWidth = bucketWidth; numFaults = new int[numFaultBuckets]; lastRotated = (time / bucketWidth) * bucketWidth; blacklisted = false; graylisted = false; blackRfbMap = new HashMap<ReasonForBlackListing, String>(); grayRfbMap = new HashMap<ReasonForBlackListing, String>(); } // timeStamp is presumed to be "now": there are no checks for past or // future values, etc. private void checkRotation(long timeStamp) { long diff = timeStamp - lastRotated; // find index of the oldest bucket(s) and zero it (or them) out while (diff > bucketWidth) { // this is now the 1st millisecond of the oldest bucket, in a modular- // arithmetic sense (i.e., about to become the newest bucket): lastRotated += bucketWidth; // corresponding bucket index: int idx = (int)((lastRotated / bucketWidth) % numFaultBuckets); // clear the bucket's contents in preparation for new faults numFaults[idx] = 0; diff -= bucketWidth; } } private int bucketIndex(long timeStamp) { // stupid Java compiler thinks an int modulus can produce a long, sigh... return (int)((timeStamp / bucketWidth) % numFaultBuckets); } // no longer any need for corresponding decrFaultCount() method since we // implicitly auto-decrement when oldest bucket's contents get wiped on // rotation void incrFaultCount(long timeStamp) { checkRotation(timeStamp); ++numFaults[bucketIndex(timeStamp)]; } int getFaultCount(long timeStamp) { checkRotation(timeStamp); int faultCount = 0; for (int faults : numFaults) { faultCount += faults; } return faultCount; } boolean isBlacklisted() { return blacklisted; } boolean isGraylisted() { return graylisted; } void setBlacklist(ReasonForBlackListing rfb, String trackerFaultReport, boolean gray) { if (gray) { graylisted = true; this.grayRfbMap.put(rfb, trackerFaultReport); } else { blacklisted = true; this.blackRfbMap.put(rfb, trackerFaultReport); } } public String getTrackerBlackOrGraylistReport(boolean gray) { StringBuffer sb = new StringBuffer(); HashMap<ReasonForBlackListing, String> rfbMap = new HashMap<ReasonForBlackListing, String>(); rfbMap.putAll(gray? grayRfbMap : blackRfbMap); for (String reasons : rfbMap.values()) { sb.append(reasons); sb.append("\n"); } return sb.toString(); } Set<ReasonForBlackListing> getReasonForBlacklisting(boolean gray) { return (gray? this.grayRfbMap.keySet() : this.blackRfbMap.keySet()); } // no longer on the blacklist (or graylist), but we're still tracking any // faults in case issue is intermittent => don't clear numFaults[] public void unBlacklist(boolean gray) { if (gray) { graylisted = false; grayRfbMap.clear(); } else { blacklisted = false; blackRfbMap.clear(); } } public boolean removeBlacklistedReason(ReasonForBlackListing rfb, boolean gray) { String str = (gray? grayRfbMap.remove(rfb) : blackRfbMap.remove(rfb)); return str!=null; } public void addBlacklistedReason(ReasonForBlackListing rfb, String reason, boolean gray) { if (gray) { grayRfbMap.put(rfb, reason); } else { blackRfbMap.put(rfb, reason); } } } private class FaultyTrackersInfo { // A map from hostName to its faults private Map<String, FaultInfo> potentiallyFaultyTrackers = new HashMap<String, FaultInfo>(); // This count gives the number of blacklisted trackers in the cluster // at any time. This is maintained to avoid iteration over // the potentiallyFaultyTrackers to get blacklisted trackers. And also // this count doesn't include blacklisted trackers which are lost, // although the fault info is maintained for lost trackers. private volatile int numBlacklistedTrackers = 0; private volatile int numGraylistedTrackers = 0; /** * Increments faults(blacklist by job) for the tracker by one. * * Adds the tracker to the potentially faulty list. * Assumes JobTracker is locked on the entry. * * @param hostName */ void incrementFaults(String hostName) { synchronized (potentiallyFaultyTrackers) { long now = clock.getTime(); FaultInfo fi = getFaultInfo(hostName, true); fi.incrFaultCount(now); // check heuristics, and add to the graylist if over the limit: if (exceedsFaults(fi, now)) { LOG.info("Adding " + hostName + " to the graylist across all jobs"); String reason = String.format(FaultInfo.FAULT_FORMAT_STRING, fi.getFaultCount(now)); blacklistTracker(hostName, reason, ReasonForBlackListing.EXCEEDING_FAILURES, true); } } } /** * Graylists the tracker across all jobs (similar to blacklisting except * not actually removed from service) if all of the following heuristics * hold: * <ol> * <li>number of faults within TRACKER_FAULT_TIMEOUT_WINDOW is greater * than or equal to TRACKER_FAULT_THRESHOLD (per-job blacklistings) * (both configurable)</li> * <li>number of faults (per-job blacklistings) for given node is more * than (1 + AVERAGE_BLACKLIST_THRESHOLD) times the average number * of faults across all nodes (configurable)</li> * <li>less than 50% of the cluster is blacklisted (NOT configurable)</li> * </ol> * Note that the node health-check script is not explicitly limited by * the 50%-blacklist limit. */ // this is the sole source of "heuristic blacklisting" == graylisting private boolean exceedsFaults(FaultInfo fi, long timeStamp) { int faultCount = fi.getFaultCount(timeStamp); if (faultCount >= TRACKER_FAULT_THRESHOLD) { // calculate average faults across all nodes long clusterSize = getClusterStatus().getTaskTrackers(); long sum = 0; for (FaultInfo f : potentiallyFaultyTrackers.values()) { sum += f.getFaultCount(timeStamp); } double avg = (double) sum / clusterSize; // avg num faults per node // graylisted trackers are already included in clusterSize: long totalCluster = clusterSize + numBlacklistedTrackers; if ((faultCount - avg) > (AVERAGE_BLACKLIST_THRESHOLD * avg) && numGraylistedTrackers < (totalCluster * MAX_BLACKLIST_FRACTION)) { return true; } } return false; } private void incrBlacklistedTrackers(int count) { LOG.info("Incrementing blacklisted trackers by " + count); numBlacklistedTrackers += count; getInstrumentation().addBlackListedTrackers(count); } private void decrBlacklistedTrackers(int count) { LOG.info("Decrementing blacklisted trackers by " + count); numBlacklistedTrackers -= count; getInstrumentation().decBlackListedTrackers(count); } private void incrGraylistedTrackers(int count) { LOG.info("Incrementing graylisted trackers by " + count); numGraylistedTrackers += count; getInstrumentation().addGrayListedTrackers(count); } private void decrGraylistedTrackers(int count) { LOG.info("Decrementing graylisted trackers by " + count); numGraylistedTrackers -= count; getInstrumentation().decGrayListedTrackers(count); } // This may be called either as a result of the node health-check script // or because of heuristics based on single-job blacklist info. private void blacklistTracker(String hostName, String reason, ReasonForBlackListing rfb, boolean gray) { FaultInfo fi = getFaultInfo(hostName, true); String shade = gray? "gray" : "black"; boolean listed = gray? fi.isGraylisted() : fi.isBlacklisted(); if (listed) { if (LOG.isDebugEnabled()) { LOG.debug("Adding/overwriting reason for " + shade + "listed tracker : " + hostName + " Reason for " + shade + "listing is : " + rfb + " Reason details : " + reason); } if (!fi.getReasonForBlacklisting(gray).contains(rfb)) { LOG.info("Adding new reason for " + shade + "listed tracker : " + hostName + " Reason for " + shade + "listing is : " + rfb + " Reason details : " + reason); } fi.addBlacklistedReason(rfb, reason, gray); } else { LOG.info("Adding new " + shade + "listed tracker : " + hostName + " Reason for " + shade + "listing is : " + rfb + " Reason details : " + reason); if (gray) { incrGraylistedTrackers(getNumTaskTrackersOnHost(hostName)); } else { Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostName); synchronized (trackers) { for (TaskTracker tracker : trackers) { tracker.cancelAllReservations(); } } removeHostCapacity(hostName); } fi.setBlacklist(rfb, reason, gray); } } /** * Check whether tasks can be assigned to the tracker. * * Faults are stored in a multi-bucket, circular sliding window; when * the implicit "time pointer" moves across a bucket boundary into the * oldest bucket, that bucket's faults are cleared, and it becomes the * newest ("current") bucket. Thus TRACKER_FAULT_TIMEOUT_WINDOW * determines the timeout value for TaskTracker faults (in combination * with TRACKER_FAULT_BUCKET_WIDTH), and the sum over all buckets is * compared with TRACKER_FAULT_THRESHOLD to determine whether graylisting * is warranted (or, alternatively, if it should be lifted). * * Assumes JobTracker is locked on entry. * * @param hostName The tracker name * @param now The current time (milliseconds) */ void checkTrackerFaultTimeout(String hostName, long now) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = potentiallyFaultyTrackers.get(hostName); // getFaultCount() auto-rotates the buckets, clearing out the oldest // as needed, before summing the faults: if (fi != null && fi.getFaultCount(now) < TRACKER_FAULT_THRESHOLD) { unBlacklistTracker(hostName, ReasonForBlackListing.EXCEEDING_FAILURES, true, now); } } } private void unBlacklistTracker(String hostName, ReasonForBlackListing rfb, boolean gray, long timeStamp) { FaultInfo fi = getFaultInfo(hostName, false); if (fi == null) { return; } Set<ReasonForBlackListing> rfbSet = fi.getReasonForBlacklisting(gray); boolean listed = gray? fi.isGraylisted() : fi.isBlacklisted(); if (listed && rfbSet.contains(rfb)) { if (fi.removeBlacklistedReason(rfb, gray)) { if (fi.getReasonForBlacklisting(gray).isEmpty()) { LOG.info("Un" + (gray? "gray" : "black") + "listing tracker : " + hostName); if (gray) { decrGraylistedTrackers(getNumTaskTrackersOnHost(hostName)); } else { addHostCapacity(hostName); } fi.unBlacklist(gray); // We have unblack/graylisted tracker, so tracker should definitely // be healthy. Check fault count; if zero, don't keep it in memory. if (fi.getFaultCount(timeStamp) == 0) { potentiallyFaultyTrackers.remove(hostName); } } } } } // Assumes JobTracker is locked on the entry private FaultInfo getFaultInfo(String hostName, boolean createIfNecessary) { FaultInfo fi = null; synchronized (potentiallyFaultyTrackers) { fi = potentiallyFaultyTrackers.get(hostName); if (fi == null && createIfNecessary) { fi = new FaultInfo(clock.getTime(), NUM_FAULT_BUCKETS, TRACKER_FAULT_BUCKET_WIDTH_MSECS); potentiallyFaultyTrackers.put(hostName, fi); } } return fi; } /** * Removes the tracker from the blacklist, graylist, and * potentially-faulty list, when it is restarted. * * Assumes JobTracker is locked on the entry. * * @param hostName */ void markTrackerHealthy(String hostName) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = potentiallyFaultyTrackers.remove(hostName); if (fi != null) { // a tracker can be both blacklisted and graylisted, so check both if (fi.isGraylisted()) { LOG.info("Marking " + hostName + " healthy from graylist"); decrGraylistedTrackers(getNumTaskTrackersOnHost(hostName)); } if (fi.isBlacklisted()) { LOG.info("Marking " + hostName + " healthy from blacklist"); addHostCapacity(hostName); } // no need for fi.unBlacklist() for either one: fi is already gone } } } private void removeHostCapacity(String hostName) { synchronized (taskTrackers) { // remove the capacity of trackers on this host int numTrackersOnHost = 0; for (TaskTrackerStatus status : getStatusesOnHost(hostName)) { int mapSlots = status.getMaxMapSlots(); totalMapTaskCapacity -= mapSlots; int reduceSlots = status.getMaxReduceSlots(); totalReduceTaskCapacity -= reduceSlots; ++numTrackersOnHost; getInstrumentation().addBlackListedMapSlots(mapSlots); getInstrumentation().addBlackListedReduceSlots(reduceSlots); } uniqueHostsMap.remove(hostName); incrBlacklistedTrackers(numTrackersOnHost); } } // This is called when a tracker is restarted or when the health-check // script reports it healthy. (Not called for graylisting.) private void addHostCapacity(String hostName) { synchronized (taskTrackers) { int numTrackersOnHost = 0; // add the capacity of trackers on the host for (TaskTrackerStatus status : getStatusesOnHost(hostName)) { int mapSlots = status.getMaxMapSlots(); totalMapTaskCapacity += mapSlots; int reduceSlots = status.getMaxReduceSlots(); totalReduceTaskCapacity += reduceSlots; numTrackersOnHost++; getInstrumentation().decBlackListedMapSlots(mapSlots); getInstrumentation().decBlackListedReduceSlots(reduceSlots); } uniqueHostsMap.put(hostName, numTrackersOnHost); decrBlacklistedTrackers(numTrackersOnHost); } } /** * Whether a host is blacklisted (by health-check script) across all jobs. * * Assumes JobTracker is locked on the entry. * * @param hostName - hostname to check * @return true if blacklisted */ boolean isBlacklisted(String hostName) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = null; if ((fi = potentiallyFaultyTrackers.get(hostName)) != null) { return fi.isBlacklisted(); } } return false; } /** * Whether a host is graylisted (by heuristics) "across all jobs". * * Assumes JobTracker is locked on the entry. * * @param hostName - hostname to check * @return true if graylisted */ boolean isGraylisted(String hostName) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = null; if ((fi = potentiallyFaultyTrackers.get(hostName)) != null) { return fi.isGraylisted(); } } return false; } // Assumes JobTracker is locked on the entry. int getFaultCount(String hostName) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = null; if ((fi = potentiallyFaultyTrackers.get(hostName)) != null) { return fi.getFaultCount(clock.getTime()); } } return 0; } // Assumes JobTracker is locked on the entry. void setNodeHealthStatus(String hostName, boolean isHealthy, String reason, long timeStamp) { FaultInfo fi = null; // If TaskTracker node is not healthy, get or create a fault info object // and blacklist it. (This path to blacklisting ultimately comes from // the health-check script called in NodeHealthCheckerService; see JIRA // MAPREDUCE-211 for details. We never use graylisting for this path.) if (!isHealthy) { fi = getFaultInfo(hostName, true); synchronized (potentiallyFaultyTrackers) { blacklistTracker(hostName, reason, ReasonForBlackListing.NODE_UNHEALTHY, false); } } else { if ((fi = getFaultInfo(hostName, false)) != null) { unBlacklistTracker(hostName, ReasonForBlackListing.NODE_UNHEALTHY, false, timeStamp); } } } } /** * Get all task tracker statuses on given host * * Assumes JobTracker is locked on the entry * @param hostName * @return {@link java.util.List} of {@link TaskTrackerStatus} */ private List<TaskTrackerStatus> getStatusesOnHost(String hostName) { List<TaskTrackerStatus> statuses = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (hostName.equals(status.getHost())) { statuses.add(status); } } } return statuses; } /** * Get total number of task trackers on given host * * Assumes JobTracker is locked on the entry * @param hostName * @return number of task trackers running on given host */ private int getNumTaskTrackersOnHost(String hostName) { int numTrackers = 0; synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (hostName.equals(status.getHost())) { ++numTrackers; } } } return numTrackers; } /////////////////////////////////////////////////////// // Used to recover the jobs upon restart /////////////////////////////////////////////////////// class RecoveryManager { Set<JobID> jobsToRecover; // set of jobs to be recovered private int totalEventsRecovered = 0; private int restartCount = 0; private boolean shouldRecover = false; Set<String> recoveredTrackers = Collections.synchronizedSet(new HashSet<String>()); /** A custom listener that replays the events in the order in which the * events (task attempts) occurred. */ class JobRecoveryListener implements Listener { // The owner job private JobInProgress jip; private JobHistory.JobInfo job; // current job's info object // Maintain the count of the (attempt) events recovered private int numEventsRecovered = 0; // Maintains open transactions private Map<String, String> hangingAttempts = new HashMap<String, String>(); // Whether there are any updates for this job private boolean hasUpdates = false; public JobRecoveryListener(JobInProgress jip) { this.jip = jip; this.job = new JobHistory.JobInfo(jip.getJobID().toString()); } /** * Process a task. Note that a task might commit a previously pending * transaction. */ private void processTask(String taskId, JobHistory.Task task) { // Any TASK info commits the previous transaction boolean hasHanging = hangingAttempts.remove(taskId) != null; if (hasHanging) { numEventsRecovered += 2; } TaskID id = TaskID.forName(taskId); TaskInProgress tip = getTip(id); updateTip(tip, task); } /** * Adds a task-attempt in the listener */ private void processTaskAttempt(String taskAttemptId, JobHistory.TaskAttempt attempt) throws UnknownHostException { TaskAttemptID id = TaskAttemptID.forName(taskAttemptId); // Check if the transaction for this attempt can be committed String taskStatus = attempt.get(Keys.TASK_STATUS); TaskAttemptID taskID = TaskAttemptID.forName(taskAttemptId); JobInProgress jip = getJob(taskID.getJobID()); JobStatus prevStatus = (JobStatus)jip.getStatus().clone(); if (taskStatus.length() > 0) { // This means this is an update event if (taskStatus.equals(Values.SUCCESS.name())) { // Mark this attempt as hanging hangingAttempts.put(id.getTaskID().toString(), taskAttemptId); addSuccessfulAttempt(jip, id, attempt); } else { addUnsuccessfulAttempt(jip, id, attempt); numEventsRecovered += 2; } } else { createTaskAttempt(jip, id, attempt); } JobStatus newStatus = (JobStatus)jip.getStatus().clone(); if (prevStatus.getRunState() != newStatus.getRunState()) { if(LOG.isDebugEnabled()) LOG.debug("Status changed hence informing prevStatus" + prevStatus + " currentStatus "+ newStatus); JobStatusChangeEvent event = new JobStatusChangeEvent(jip, EventType.RUN_STATE_CHANGED, prevStatus, newStatus); updateJobInProgressListeners(event); } } public void handle(JobHistory.RecordTypes recType, Map<Keys, String> values) throws IOException { if (recType == JobHistory.RecordTypes.Job) { // Update the meta-level job information job.handle(values); // Forcefully init the job as we have some updates for it checkAndInit(); } else if (recType.equals(JobHistory.RecordTypes.Task)) { String taskId = values.get(Keys.TASKID); // Create a task JobHistory.Task task = new JobHistory.Task(); task.handle(values); // Ignore if its a cleanup task if (isCleanup(task)) { return; } // Process the task i.e update the tip state processTask(taskId, task); } else if (recType.equals(JobHistory.RecordTypes.MapAttempt)) { String attemptId = values.get(Keys.TASK_ATTEMPT_ID); // Create a task attempt JobHistory.MapAttempt attempt = new JobHistory.MapAttempt(); attempt.handle(values); // Ignore if its a cleanup task if (isCleanup(attempt)) { return; } // Process the attempt i.e update the attempt state via job processTaskAttempt(attemptId, attempt); } else if (recType.equals(JobHistory.RecordTypes.ReduceAttempt)) { String attemptId = values.get(Keys.TASK_ATTEMPT_ID); // Create a task attempt JobHistory.ReduceAttempt attempt = new JobHistory.ReduceAttempt(); attempt.handle(values); // Ignore if its a cleanup task if (isCleanup(attempt)) { return; } // Process the attempt i.e update the job state via job processTaskAttempt(attemptId, attempt); } } // Check if the task is of type CLEANUP private boolean isCleanup(JobHistory.Task task) { String taskType = task.get(Keys.TASK_TYPE); return Values.CLEANUP.name().equals(taskType); } // Init the job if its ready for init. Also make sure that the scheduler // is updated private void checkAndInit() throws IOException { String jobStatus = this.job.get(Keys.JOB_STATUS); if (Values.PREP.name().equals(jobStatus)) { hasUpdates = true; LOG.info("Calling init from RM for job " + jip.getJobID().toString()); try { initJob(jip); } catch (Throwable t) { LOG.error("Job initialization failed : \n" + StringUtils.stringifyException(t)); jip.status.setFailureInfo("Job Initialization failed: \n" + StringUtils.stringifyException(t)); failJob(jip); throw new IOException(t); } } } void close() { if (hasUpdates) { // Apply the final (job-level) updates JobStatusChangeEvent event = updateJob(jip, job); synchronized (JobTracker.this) { // Update the job listeners updateJobInProgressListeners(event); } } } public int getNumEventsRecovered() { return numEventsRecovered; } } public RecoveryManager() { jobsToRecover = new TreeSet<JobID>(); } public boolean contains(JobID id) { return jobsToRecover.contains(id); } void addJobForRecovery(JobID id) { jobsToRecover.add(id); } public boolean shouldRecover() { return shouldRecover; } public boolean shouldSchedule() { return recoveredTrackers.isEmpty(); } private void markTracker(String trackerName) { recoveredTrackers.add(trackerName); } void unMarkTracker(String trackerName) { recoveredTrackers.remove(trackerName); } Set<JobID> getJobsToRecover() { return jobsToRecover; } /** Check if the given string represents a job-id or not */ private boolean isJobNameValid(String str) { if(str == null) { return false; } String[] parts = str.split("_"); if(parts.length == 3) { if(parts[0].equals("job")) { // other 2 parts should be parseable return JobTracker.validateIdentifier(parts[1]) && JobTracker.validateJobNumber(parts[2]); } } return false; } // checks if the job dir has the required files public void checkAndAddJob(FileStatus status) throws IOException { String fileName = status.getPath().getName(); if (isJobNameValid(fileName)) { if (JobClient.isJobDirValid(status.getPath(), fs)) { recoveryManager.addJobForRecovery(JobID.forName(fileName)); shouldRecover = true; // enable actual recovery if num-files > 1 } else { LOG.info("Found an incomplete job directory " + fileName + "." + " Deleting it!!"); fs.delete(status.getPath(), true); } } } private JobStatusChangeEvent updateJob(JobInProgress jip, JobHistory.JobInfo job) { // Change the job priority String jobpriority = job.get(Keys.JOB_PRIORITY); JobPriority priority = JobPriority.valueOf(jobpriority); // It's important to update this via the jobtracker's api as it will // take care of updating the event listeners too try { setJobPriority(jip.getJobID(), priority); } catch (IOException e) { // This will not happen. JobTracker can set jobPriority of any job // as mrOwner has the needed permissions. LOG.warn("Unexpected. JobTracker could not do SetJobPriority on " + jip.getJobID() + ". " + e); } // Save the previous job status JobStatus oldStatus = (JobStatus)jip.getStatus().clone(); // Set the start/launch time only if there are recovered tasks // Increment the job's restart count jip.updateJobInfo(job.getLong(JobHistory.Keys.SUBMIT_TIME), job.getLong(JobHistory.Keys.LAUNCH_TIME)); // Save the new job status JobStatus newStatus = (JobStatus)jip.getStatus().clone(); return new JobStatusChangeEvent(jip, EventType.START_TIME_CHANGED, oldStatus, newStatus); } private void updateTip(TaskInProgress tip, JobHistory.Task task) { long startTime = task.getLong(Keys.START_TIME); if (startTime != 0) { tip.setExecStartTime(startTime); } long finishTime = task.getLong(Keys.FINISH_TIME); // For failed tasks finish-time will be missing if (finishTime != 0) { tip.setExecFinishTime(finishTime); } String cause = task.get(Keys.TASK_ATTEMPT_ID); if (cause.length() > 0) { // This means that the this is a FAILED events TaskAttemptID id = TaskAttemptID.forName(cause); TaskStatus status = tip.getTaskStatus(id); synchronized (JobTracker.this) { // This will add the tip failed event in the new log tip.getJob().failedTask(tip, id, status.getDiagnosticInfo(), status.getPhase(), status.getRunState(), status.getTaskTracker()); } } } private void createTaskAttempt(JobInProgress job, TaskAttemptID attemptId, JobHistory.TaskAttempt attempt) throws UnknownHostException { TaskID id = attemptId.getTaskID(); String type = attempt.get(Keys.TASK_TYPE); TaskInProgress tip = job.getTaskInProgress(id); // I. Get the required info TaskStatus taskStatus = null; String trackerName = attempt.get(Keys.TRACKER_NAME); String trackerHostName = JobInProgress.convertTrackerNameToHostName(trackerName); // recover the port information. int port = 0; // default to 0 String hport = attempt.get(Keys.HTTP_PORT); if (hport != null && hport.length() > 0) { port = attempt.getInt(Keys.HTTP_PORT); } long attemptStartTime = attempt.getLong(Keys.START_TIME); // II. Create the (appropriate) task status if (type.equals(Values.MAP.name())) { taskStatus = new MapTaskStatus(attemptId, 0.0f, job.getNumSlotsPerTask(TaskType.MAP), TaskStatus.State.RUNNING, "", "", trackerName, TaskStatus.Phase.MAP, new Counters()); } else { taskStatus = new ReduceTaskStatus(attemptId, 0.0f, job.getNumSlotsPerTask(TaskType.REDUCE), TaskStatus.State.RUNNING, "", "", trackerName, TaskStatus.Phase.REDUCE, new Counters()); } // Set the start time taskStatus.setStartTime(attemptStartTime); List<TaskStatus> ttStatusList = new ArrayList<TaskStatus>(); ttStatusList.add(taskStatus); // III. Create the dummy tasktracker status //TaskTrackerStatus ttStatus = // new TaskTrackerStatus(trackerName, trackerHostName, port, ttStatusList, // 0 , 0, 0); TaskTrackerStatus ttStatus = new TaskTrackerStatus(trackerName, trackerHostName, port, ttStatusList, 0, 0, 0, 0); ttStatus.setLastSeen(clock.getTime()); synchronized (JobTracker.this) { synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { // IV. Register a new tracker TaskTracker taskTracker = getTaskTracker(trackerName); boolean isTrackerRegistered = (taskTracker != null); if (!isTrackerRegistered) { markTracker(trackerName); // add the tracker to recovery-manager taskTracker = new TaskTracker(trackerName); taskTracker.setStatus(ttStatus); addNewTracker(taskTracker); } // V. Update the tracker status // This will update the meta info of the jobtracker and also add the // tracker status if missing i.e register it updateTaskTrackerStatus(trackerName, ttStatus); } } // Register the attempt with job and tip, under JobTracker lock. // Since, as of today they are atomic through heartbeat. // VI. Register the attempt // a) In the job job.addRunningTaskToTIP(tip, attemptId, ttStatus, false); // b) In the tip tip.updateStatus(taskStatus); } // VII. Make an entry in the launched tasks expireLaunchingTasks.addNewTask(attemptId); } private void addSuccessfulAttempt(JobInProgress job, TaskAttemptID attemptId, JobHistory.TaskAttempt attempt) { // I. Get the required info TaskID taskId = attemptId.getTaskID(); String type = attempt.get(Keys.TASK_TYPE); TaskInProgress tip = job.getTaskInProgress(taskId); long attemptFinishTime = attempt.getLong(Keys.FINISH_TIME); // Get the task status and the tracker name and make a copy of it TaskStatus taskStatus = (TaskStatus)tip.getTaskStatus(attemptId).clone(); taskStatus.setFinishTime(attemptFinishTime); String stateString = attempt.get(Keys.STATE_STRING); // Update the basic values taskStatus.setStateString(stateString); taskStatus.setProgress(1.0f); taskStatus.setRunState(TaskStatus.State.SUCCEEDED); // Set the shuffle/sort finished times if (type.equals(Values.REDUCE.name())) { long shuffleTime = Long.parseLong(attempt.get(Keys.SHUFFLE_FINISHED)); long sortTime = Long.parseLong(attempt.get(Keys.SORT_FINISHED)); taskStatus.setShuffleFinishTime(shuffleTime); taskStatus.setSortFinishTime(sortTime); } // Add the counters String counterString = attempt.get(Keys.COUNTERS); Counters counter = null; //TODO Check if an exception should be thrown try { counter = Counters.fromEscapedCompactString(counterString); } catch (ParseException pe) { counter = new Counters(); // Set it to empty counter } taskStatus.setCounters(counter); synchronized (JobTracker.this) { // II. Replay the status job.updateTaskStatus(tip, taskStatus); } // III. Prevent the task from expiry expireLaunchingTasks.removeTask(attemptId); } private void addUnsuccessfulAttempt(JobInProgress job, TaskAttemptID attemptId, JobHistory.TaskAttempt attempt) { // I. Get the required info TaskID taskId = attemptId.getTaskID(); TaskInProgress tip = job.getTaskInProgress(taskId); long attemptFinishTime = attempt.getLong(Keys.FINISH_TIME); TaskStatus taskStatus = (TaskStatus)tip.getTaskStatus(attemptId).clone(); taskStatus.setFinishTime(attemptFinishTime); // Reset the progress taskStatus.setProgress(0.0f); String stateString = attempt.get(Keys.STATE_STRING); taskStatus.setStateString(stateString); boolean hasFailed = attempt.get(Keys.TASK_STATUS).equals(Values.FAILED.name()); // Set the state failed/killed if (hasFailed) { taskStatus.setRunState(TaskStatus.State.FAILED); } else { taskStatus.setRunState(TaskStatus.State.KILLED); } // Get/Set the error msg String diagInfo = attempt.get(Keys.ERROR); taskStatus.setDiagnosticInfo(diagInfo); // diag info synchronized (JobTracker.this) { // II. Update the task status job.updateTaskStatus(tip, taskStatus); } // III. Prevent the task from expiry expireLaunchingTasks.removeTask(attemptId); } Path getRestartCountFile() { return new Path(getSystemDir(), "jobtracker.info"); } Path getTempRestartCountFile() { return new Path(getSystemDir(), "jobtracker.info.recover"); } /** * Initialize the recovery process. It simply creates a jobtracker.info file * in the jobtracker's system directory and writes its restart count in it. * For the first start, the jobtracker writes '0' in it. Upon subsequent * restarts the jobtracker replaces the count with its current count which * is (old count + 1). The whole purpose of this api is to obtain restart * counts across restarts to avoid attempt-id clashes. * * Note that in between if the jobtracker.info files goes missing then the * jobtracker will disable recovery and continue. * */ void updateRestartCount() throws IOException { Path restartFile = getRestartCountFile(); Path tmpRestartFile = getTempRestartCountFile(); FsPermission filePerm = new FsPermission(SYSTEM_FILE_PERMISSION); // read the count from the jobtracker info file if (fs.exists(restartFile)) { fs.delete(tmpRestartFile, false); // delete the tmp file } else if (fs.exists(tmpRestartFile)) { // if .rec exists then delete the main file and rename the .rec to main fs.rename(tmpRestartFile, restartFile); // rename .rec to main file } else { // For the very first time the jobtracker will create a jobtracker.info // file. If the jobtracker has restarted then disable recovery as files' // needed for recovery are missing. // disable recovery if this is a restart shouldRecover = false; // write the jobtracker.info file try { FSDataOutputStream out = FileSystem.create(fs, restartFile, filePerm); out.writeInt(0); out.close(); } catch (IOException ioe) { LOG.warn("Writing to file " + restartFile + " failed!"); LOG.warn("FileSystem is not ready yet!"); fs.delete(restartFile, false); throw ioe; } return; } FSDataInputStream in = fs.open(restartFile); try { // read the old count restartCount = in.readInt(); ++restartCount; // increment the restart count } catch (IOException ioe) { LOG.warn("System directory is garbled. Failed to read file " + restartFile); LOG.warn("Jobtracker recovery is not possible with garbled" + " system directory! Please delete the system directory and" + " restart the jobtracker. Note that deleting the system" + " directory will result in loss of all the running jobs."); throw new RuntimeException(ioe); } finally { if (in != null) { in.close(); } } // Write back the new restart count and rename the old info file //TODO This is similar to jobhistory recovery, maybe this common code // can be factored out. // write to the tmp file FSDataOutputStream out = FileSystem.create(fs, tmpRestartFile, filePerm); out.writeInt(restartCount); out.close(); // delete the main file fs.delete(restartFile, false); // rename the .rec to main file fs.rename(tmpRestartFile, restartFile); } // mapred.JobID::forName returns @SuppressWarnings("unchecked") // mapreduce.JobID public void recover() { if (!shouldRecover()) { // clean up jobs structure jobsToRecover.clear(); return; } LOG.info("Restart count of the jobtracker : " + restartCount); // I. Init the jobs and cache the recovered job history filenames Map<JobID, Path> jobHistoryFilenameMap = new HashMap<JobID, Path>(); Iterator<JobID> idIter = jobsToRecover.iterator(); JobInProgress job = null; File jobIdFile = null; // 0. Cleanup try { JobHistory.JobInfo.deleteConfFiles(); } catch (IOException ioe) { LOG.info("Error in cleaning up job history folder", ioe); } while (idIter.hasNext()) { JobID id = idIter.next(); LOG.info("Trying to recover details of job " + id); try { // 1. Recover job owner and create JIP jobIdFile = new File(lDirAlloc.getLocalPathToRead(SUBDIR + "/" + id, conf).toString()); String user = null; if (jobIdFile != null && jobIdFile.exists()) { LOG.info("File " + jobIdFile + " exists for job " + id); FileInputStream in = new FileInputStream(jobIdFile); BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(in)); user = reader.readLine(); LOG.info("Recovered user " + user + " for job " + id); } finally { if (reader != null) { reader.close(); } in.close(); } } if (user == null) { throw new RuntimeException("Incomplete job " + id); } // Create the job /* THIS PART OF THE CODE IS USELESS. JOB RECOVERY SHOULD BE * BACKPORTED (MAPREDUCE-873) */ job = new JobInProgress(JobTracker.this, conf, new JobInfo((org.apache.hadoop.mapreduce.JobID) id, new Text(user), new Path(getStagingAreaDirInternal(user))), restartCount, new Credentials() /*HACK*/); // 2. Check if the user has appropriate access // Get the user group info for the job's owner UserGroupInformation ugi = UserGroupInformation.createRemoteUser(job.getJobConf().getUser()); LOG.info("Submitting job " + id + " on behalf of user " + ugi.getShortUserName() + " in groups : " + StringUtils.arrayToString(ugi.getGroupNames())); // check the access try { aclsManager.checkAccess(job, ugi, Operation.SUBMIT_JOB); } catch (Throwable t) { LOG.warn("Access denied for user " + ugi.getShortUserName() + " in groups : [" + StringUtils.arrayToString(ugi.getGroupNames()) + "]"); throw t; } // 3. Get the log file and the file path String logFileName = JobHistory.JobInfo.getJobHistoryFileName(job.getJobConf(), id); if (logFileName != null) { Path jobHistoryFilePath = JobHistory.JobInfo.getJobHistoryLogLocation(logFileName); // 4. Recover the history file. This involved // - deleting file.recover if file exists // - renaming file.recover to file if file doesnt exist // This makes sure that the (master) file exists JobHistory.JobInfo.recoverJobHistoryFile(job.getJobConf(), jobHistoryFilePath); // 5. Cache the history file name as it costs one dfs access jobHistoryFilenameMap.put(job.getJobID(), jobHistoryFilePath); } else { LOG.info("No history file found for job " + id); idIter.remove(); // remove from recovery list } // 6. Sumbit the job to the jobtracker addJob(id, job); } catch (Throwable t) { LOG.warn("Failed to recover job " + id + " Ignoring the job.", t); idIter.remove(); if (jobIdFile != null) { jobIdFile.delete(); jobIdFile = null; } if (job != null) { job.fail(); job = null; } continue; } } long recoveryStartTime = clock.getTime(); // II. Recover each job idIter = jobsToRecover.iterator(); while (idIter.hasNext()) { JobID id = idIter.next(); JobInProgress pJob = getJob(id); // 1. Get the required info // Get the recovered history file Path jobHistoryFilePath = jobHistoryFilenameMap.get(pJob.getJobID()); String logFileName = jobHistoryFilePath.getName(); FileSystem fs; try { fs = jobHistoryFilePath.getFileSystem(conf); } catch (IOException ioe) { LOG.warn("Failed to get the filesystem for job " + id + ". Ignoring.", ioe); continue; } // 2. Parse the history file // Note that this also involves job update JobRecoveryListener listener = new JobRecoveryListener(pJob); try { JobHistory.parseHistoryFromFS(jobHistoryFilePath.toString(), listener, fs); } catch (Throwable t) { LOG.info("Error reading history file of job " + pJob.getJobID() + ". Ignoring the error and continuing.", t); } // 3. Close the listener listener.close(); // 4. Update the recovery metric totalEventsRecovered += listener.getNumEventsRecovered(); // 5. Cleanup history // Delete the master log file as an indication that the new file // should be used in future try { synchronized (pJob) { JobHistory.JobInfo.checkpointRecovery(logFileName, pJob.getJobConf()); } } catch (Throwable t) { LOG.warn("Failed to delete log file (" + logFileName + ") for job " + id + ". Continuing.", t); } if (pJob.isComplete()) { idIter.remove(); // no need to keep this job info as its successful } } recoveryDuration = clock.getTime() - recoveryStartTime; hasRecovered = true; // III. Finalize the recovery synchronized (trackerExpiryQueue) { // Make sure that the tracker statuses in the expiry-tracker queue // are updated long now = clock.getTime(); int size = trackerExpiryQueue.size(); for (int i = 0; i < size ; ++i) { // Get the first tasktracker TaskTrackerStatus taskTracker = trackerExpiryQueue.first(); // Remove it trackerExpiryQueue.remove(taskTracker); // Set the new time taskTracker.setLastSeen(now); // Add back to get the sorted list trackerExpiryQueue.add(taskTracker); } } LOG.info("Restoration complete"); } int totalEventsRecovered() { return totalEventsRecovered; } } private JobTrackerInstrumentation myInstrumentation; private void createInstrumentation() { // Initialize instrumentation JobTrackerInstrumentation tmp; Class<? extends JobTrackerInstrumentation> metricsInst = getInstrumentationClass(conf); LOG.debug("instrumentation class="+ metricsInst); if (metricsInst == null) { myInstrumentation = JobTrackerInstrumentation.create(this, conf); return; } try { java.lang.reflect.Constructor<? extends JobTrackerInstrumentation> c = metricsInst.getConstructor(new Class<?>[]{JobTracker.class, JobConf.class}); tmp = c.newInstance(this, conf); } catch (Exception e) { //Reflection can throw lots of exceptions -- handle them all by //falling back on the default. LOG.error("failed to initialize job tracker metrics", e); tmp = JobTrackerInstrumentation.create(this, conf); } myInstrumentation = tmp; } ///////////////////////////////////////////////////////////////// // The real JobTracker //////////////////////////////////////////////////////////////// int port; String localMachine; private String trackerIdentifier; long startTime; int totalSubmissions = 0; private int totalMapTaskCapacity; private int totalReduceTaskCapacity; private HostsFileReader hostsReader; // JobTracker recovery variables private volatile boolean hasRestarted = false; private volatile boolean hasRecovered = false; private volatile long recoveryDuration; // // Properties to maintain while running Jobs and Tasks: // // 1. Each Task is always contained in a single Job. A Job succeeds when all its // Tasks are complete. // // 2. Every running or successful Task is assigned to a Tracker. Idle Tasks are not. // // 3. When a Tracker fails, all of its assigned Tasks are marked as failures. // // 4. A Task might need to be reexecuted if it (or the machine it's hosted on) fails // before the Job is 100% complete. Sometimes an upstream Task can fail without // reexecution if all downstream Tasks that require its output have already obtained // the necessary files. // // All the known jobs. (jobid->JobInProgress) Map<JobID, JobInProgress> jobs = Collections.synchronizedMap(new TreeMap<JobID, JobInProgress>()); // (user -> list of JobInProgress) TreeMap<String, ArrayList<JobInProgress>> userToJobsMap = new TreeMap<String, ArrayList<JobInProgress>>(); // (trackerID --> list of jobs to cleanup) Map<String, Set<JobID>> trackerToJobsToCleanup = new HashMap<String, Set<JobID>>(); // (trackerID --> list of tasks to cleanup) Map<String, Set<TaskAttemptID>> trackerToTasksToCleanup = new HashMap<String, Set<TaskAttemptID>>(); // All the known TaskInProgress items, mapped to by taskids (taskid->TIP) Map<TaskAttemptID, TaskInProgress> taskidToTIPMap = new TreeMap<TaskAttemptID, TaskInProgress>(); // This is used to keep track of all trackers running on one host. While // decommissioning the host, all the trackers on the host will be lost. Map<String, Set<TaskTracker>> hostnameToTaskTracker = Collections.synchronizedMap(new TreeMap<String, Set<TaskTracker>>()); // (taskid --> trackerID) TreeMap<TaskAttemptID, String> taskidToTrackerMap = new TreeMap<TaskAttemptID, String>(); // (trackerID->TreeSet of taskids running at that tracker) TreeMap<String, Set<TaskAttemptID>> trackerToTaskMap = new TreeMap<String, Set<TaskAttemptID>>(); // (trackerID -> TreeSet of completed taskids running at that tracker) TreeMap<String, Set<TaskAttemptID>> trackerToMarkedTasksMap = new TreeMap<String, Set<TaskAttemptID>>(); // (trackerID --> last sent HeartBeatResponse) Map<String, HeartbeatResponse> trackerToHeartbeatResponseMap = new TreeMap<String, HeartbeatResponse>(); // (hostname --> Node (NetworkTopology)) Map<String, Node> hostnameToNodeMap = Collections.synchronizedMap(new TreeMap<String, Node>()); // Number of resolved entries int numResolved; // statistics about TaskTrackers with faults; may lead to graylisting private FaultyTrackersInfo faultyTrackers = new FaultyTrackersInfo(); private JobTrackerStatistics statistics = new JobTrackerStatistics(); // // Watch and expire TaskTracker objects using these structures. // We can map from Name->TaskTrackerStatus, or we can expire by time. // int totalMaps = 0; int totalReduces = 0; private int occupiedMapSlots = 0; private int occupiedReduceSlots = 0; private int reservedMapSlots = 0; private int reservedReduceSlots = 0; private HashMap<String, TaskTracker> taskTrackers = new HashMap<String, TaskTracker>(); Map<String,Integer>uniqueHostsMap = new ConcurrentHashMap<String, Integer>(); ExpireTrackers expireTrackers = new ExpireTrackers(); Thread expireTrackersThread = null; RetireJobs retireJobs = new RetireJobs(); Thread retireJobsThread = null; final int retiredJobsCacheSize; ExpireLaunchingTasks expireLaunchingTasks = new ExpireLaunchingTasks(); Thread expireLaunchingTaskThread = new Thread(expireLaunchingTasks, "expireLaunchingTasks"); CompletedJobStatusStore completedJobStatusStore = null; Thread completedJobsStoreThread = null; RecoveryManager recoveryManager; JobHistoryServer jobHistoryServer; /** * It might seem like a bug to maintain a TreeSet of tasktracker objects, * which can be updated at any time. But that's not what happens! We * only update status objects in the taskTrackers table. Status objects * are never updated once they enter the expiry queue. Instead, we wait * for them to expire and remove them from the expiry queue. If a status * object has been updated in the taskTracker table, the latest status is * reinserted. Otherwise, we assume the tracker has expired. */ TreeSet<TaskTrackerStatus> trackerExpiryQueue = new TreeSet<TaskTrackerStatus>( new Comparator<TaskTrackerStatus>() { public int compare(TaskTrackerStatus p1, TaskTrackerStatus p2) { if (p1.getLastSeen() < p2.getLastSeen()) { return -1; } else if (p1.getLastSeen() > p2.getLastSeen()) { return 1; } else { return (p1.getTrackerName().compareTo(p2.getTrackerName())); } } } ); // Used to provide an HTML view on Job, Task, and TaskTracker structures final HttpServer infoServer; int infoPort; Server interTrackerServer; // Some jobs are stored in a local system directory. We can delete // the files when we're done with the job. static final String SUBDIR = "jobTracker"; final LocalFileSystem localFs; FileSystem fs = null; Path systemDir = null; JobConf conf; private final ACLsManager aclsManager; long limitMaxMemForMapTasks; long limitMaxMemForReduceTasks; long memSizeForMapSlotOnJT; long memSizeForReduceSlotOnJT; private QueueManager queueManager; /** * Start the JobTracker process, listen on the indicated port */ JobTracker(JobConf conf) throws IOException, InterruptedException { this(conf, generateNewIdentifier()); } JobTracker(JobConf conf, QueueManager qm) throws IOException, InterruptedException { this(conf, generateNewIdentifier(), new Clock(), qm); } JobTracker(JobConf conf, Clock clock) throws IOException, InterruptedException { this(conf, generateNewIdentifier(), clock); } public static final String JT_USER_NAME = "mapreduce.jobtracker.kerberos.principal"; public static final String JT_KEYTAB_FILE = "mapreduce.jobtracker.keytab.file"; JobTracker(final JobConf conf, String identifier) throws IOException, InterruptedException { this(conf, identifier, new Clock()); } JobTracker(final JobConf conf, String identifier, Clock clock) throws IOException, InterruptedException { this(conf, identifier, clock, new QueueManager(new Configuration(conf))); } JobTracker(final JobConf conf, String identifier, Clock clock, QueueManager qm) throws IOException, InterruptedException { this.queueManager = qm; this.clock = clock; // Set ports, start RPC servers, setup security policy etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); // find the owner of the process // get the desired principal to load UserGroupInformation.setConfiguration(conf); SecurityUtil.login(conf, JT_KEYTAB_FILE, JT_USER_NAME, localMachine); long secretKeyInterval = conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); long tokenMaxLifetime = conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); long tokenRenewInterval = conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval, DELEGATION_TOKEN_GC_INTERVAL); secretManager.startThreads(); MAX_JOBCONF_SIZE = conf.getLong(MAX_USER_JOBCONF_SIZE_KEY, MAX_JOBCONF_SIZE); // // Grab some static constants // TASKTRACKER_EXPIRY_INTERVAL = conf.getLong("mapred.tasktracker.expiry.interval", 10 * 60 * 1000); RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000); RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000); retiredJobsCacheSize = conf.getInt("mapred.job.tracker.retiredjobs.cache.size", 1000); MAX_COMPLETE_USER_JOBS_IN_MEMORY = conf.getInt("mapred.jobtracker.completeuserjobs.maximum", 100); // values related to heuristic graylisting (a "fault" is a per-job // blacklisting; too many faults => node is graylisted across all jobs): TRACKER_FAULT_TIMEOUT_WINDOW = // 3 hours conf.getInt("mapred.jobtracker.blacklist.fault-timeout-window", 3 * 60); TRACKER_FAULT_BUCKET_WIDTH = // 15 minutes conf.getInt("mapred.jobtracker.blacklist.fault-bucket-width", 15); TRACKER_FAULT_THRESHOLD = conf.getInt("mapred.max.tracker.blacklists", 4); // future: rename to "mapred.jobtracker.blacklist.fault-threshold" for // namespace consistency if (TRACKER_FAULT_BUCKET_WIDTH > TRACKER_FAULT_TIMEOUT_WINDOW) { TRACKER_FAULT_BUCKET_WIDTH = TRACKER_FAULT_TIMEOUT_WINDOW; } TRACKER_FAULT_BUCKET_WIDTH_MSECS = (long)TRACKER_FAULT_BUCKET_WIDTH * 60 * 1000; // ideally, TRACKER_FAULT_TIMEOUT_WINDOW should be an integral multiple of // TRACKER_FAULT_BUCKET_WIDTH, but round up just in case: NUM_FAULT_BUCKETS = (TRACKER_FAULT_TIMEOUT_WINDOW + TRACKER_FAULT_BUCKET_WIDTH - 1) / TRACKER_FAULT_BUCKET_WIDTH; NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND); if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) { NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND; } HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR); if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) { HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR; } // This configuration is there solely for tuning purposes and // once this feature has been tested in real clusters and an appropriate // value for the threshold has been found, this config might be taken out. AVERAGE_BLACKLIST_THRESHOLD = conf.getFloat("mapred.cluster.average.blacklist.threshold", 0.5f); // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); initializeTaskMemoryRelatedConfig(); // Read the hosts/exclude files to restrict access to the jobtracker. this.hostsReader = new HostsFileReader(conf.get("mapred.hosts", ""), conf.get("mapred.hosts.exclude", "")); aclsManager = new ACLsManager(conf, new JobACLsManager(conf), queueManager); LOG.info("Starting jobtracker with owner as " + getMROwner().getShortUserName()); // Create the scheduler Class<? extends TaskScheduler> schedulerClass = conf.getClass("mapred.jobtracker.taskScheduler", JobQueueTaskScheduler.class, TaskScheduler.class); taskScheduler = (TaskScheduler) ReflectionUtils.newInstance(schedulerClass, conf); // Set service-level authorization security policy if (conf.getBoolean( ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) { ServiceAuthorizationManager.refresh(conf, new MapReducePolicyProvider()); } int handlerCount = conf.getInt("mapred.job.tracker.handler.count", 10); this.interTrackerServer = RPC.getServer(this, addr.getHostName(), addr.getPort(), handlerCount, false, conf, secretManager); if (LOG.isDebugEnabled()) { Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext();) { String key = (String) it.next(); String val = p.getProperty(key); LOG.debug("Property '" + key + "' is " + val); } } String infoAddr = NetUtils.getServerAddress(conf, "mapred.job.tracker.info.bindAddress", "mapred.job.tracker.info.port", "mapred.job.tracker.http.address"); InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); String infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); this.startTime = clock.getTime(); infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf, aclsManager.getAdminsAcl()); infoServer.setAttribute("job.tracker", this); // initialize history parameters. final JobTracker jtFinal = this; getMROwner().doAs(new PrivilegedExceptionAction<Boolean>() { @Override public Boolean run() throws Exception { JobHistory.init(jtFinal, conf,jtFinal.localMachine, jtFinal.startTime); return true; } }); infoServer.addServlet("reducegraph", "/taskgraph", TaskGraphServlet.class); infoServer.start(); this.trackerIdentifier = identifier; createInstrumentation(); // The rpc/web-server ports can be ephemeral ports... // ... ensure we have the correct info this.port = interTrackerServer.getListenerAddress().getPort(); this.conf.set("mapred.job.tracker", (this.localMachine + ":" + this.port)); this.localFs = FileSystem.getLocal(conf); LOG.info("JobTracker up at: " + this.port); this.infoPort = this.infoServer.getPort(); this.conf.set("mapred.job.tracker.http.address", infoBindAddress + ":" + this.infoPort); LOG.info("JobTracker webserver: " + this.infoServer.getPort()); // start the recovery manager recoveryManager = new RecoveryManager(); while (!Thread.currentThread().isInterrupted()) { try { // if we haven't contacted the namenode go ahead and do it if (fs == null) { fs = getMROwner().doAs(new PrivilegedExceptionAction<FileSystem>() { public FileSystem run() throws IOException { return FileSystem.get(conf); }}); } // clean up the system dir, which will only work if hdfs is out of // safe mode if(systemDir == null) { systemDir = new Path(getSystemDir()); } try { FileStatus systemDirStatus = fs.getFileStatus(systemDir); if (!systemDirStatus.getOwner().equals( getMROwner().getShortUserName())) { throw new AccessControlException("The systemdir " + systemDir + " is not owned by " + getMROwner().getShortUserName()); } if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) { LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to " + SYSTEM_DIR_PERMISSION); fs.setPermission(systemDir,new FsPermission(SYSTEM_DIR_PERMISSION)); } } catch (FileNotFoundException fnf) {} //ignore // Make sure that the backup data is preserved FileStatus[] systemDirData = fs.listStatus(this.systemDir); // Check if the history is enabled .. as we cant have persistence with // history disabled if (conf.getBoolean("mapred.jobtracker.restart.recover", false) && systemDirData != null) { for (FileStatus status : systemDirData) { try { recoveryManager.checkAndAddJob(status); } catch (Throwable t) { LOG.warn("Failed to add the job " + status.getPath().getName(), t); } } // Check if there are jobs to be recovered hasRestarted = recoveryManager.shouldRecover(); if (hasRestarted) { break; // if there is something to recover else clean the sys dir } } LOG.info("Cleaning up the system directory"); fs.delete(systemDir, true); if (FileSystem.mkdirs(fs, systemDir, new FsPermission(SYSTEM_DIR_PERMISSION))) { break; } LOG.error("Mkdirs failed to create " + systemDir); } catch (AccessControlException ace) { LOG.warn("Failed to operate on mapred.system.dir (" + systemDir + ") because of permissions."); LOG.warn("Manually delete the mapred.system.dir (" + systemDir + ") and then start the JobTracker."); LOG.warn("Bailing out ... ", ace); throw ace; } catch (IOException ie) { LOG.info("problem cleaning system directory: " + systemDir, ie); } Thread.sleep(FS_ACCESS_RETRY_PERIOD); } if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(); } // Same with 'localDir' except it's always on the local disk. if (!hasRestarted) { jobConf.deleteLocalFiles(SUBDIR); } // Initialize history DONE folder FileSystem historyFS = getMROwner().doAs( new PrivilegedExceptionAction<FileSystem>() { public FileSystem run() throws IOException { JobHistory.initDone(conf, fs); final String historyLogDir = JobHistory.getCompletedJobHistoryLocation().toString(); infoServer.setAttribute("historyLogDir", historyLogDir); infoServer.setAttribute ("serialNumberDirectoryDigits", Integer.valueOf(JobHistory.serialNumberDirectoryDigits())); infoServer.setAttribute ("serialNumberTotalDigits", Integer.valueOf(JobHistory.serialNumberTotalDigits())); return new Path(historyLogDir).getFileSystem(conf); } }); infoServer.setAttribute("fileSys", historyFS); infoServer.setAttribute("jobConf", conf); infoServer.setAttribute("aclManager", aclsManager); if (JobHistoryServer.isEmbedded(conf)) { LOG.info("History server being initialized in embedded mode"); jobHistoryServer = new JobHistoryServer(conf, aclsManager, infoServer); jobHistoryServer.start(); LOG.info("Job History Server web address: " + JobHistoryServer.getAddress(conf)); } this.dnsToSwitchMapping = ReflectionUtils.newInstance( conf.getClass("topology.node.switch.mapping.impl", ScriptBasedMapping.class, DNSToSwitchMapping.class), conf); this.numTaskCacheLevels = conf.getInt("mapred.task.cache.levels", NetworkTopology.DEFAULT_HOST_LEVEL); //initializes the job status store completedJobStatusStore = new CompletedJobStatusStore(conf, aclsManager); } private static SimpleDateFormat getDateFormat() { return new SimpleDateFormat("yyyyMMddHHmm"); } private static String generateNewIdentifier() { return getDateFormat().format(new Date()); } static boolean validateIdentifier(String id) { try { // the jobtracker id should be 'date' parseable getDateFormat().parse(id); return true; } catch (ParseException pe) {} return false; } static boolean validateJobNumber(String id) { try { // the job number should be integer parseable Integer.parseInt(id); return true; } catch (IllegalArgumentException pe) {} return false; } /** * Whether the JT has restarted */ public boolean hasRestarted() { return hasRestarted; } /** * Whether the JT has recovered upon restart */ public boolean hasRecovered() { return hasRecovered; } /** * How long the jobtracker took to recover from restart. */ public long getRecoveryDuration() { return hasRestarted() ? recoveryDuration : 0; } /** * Get JobTracker's FileSystem. This is the filesystem for mapred.system.dir. */ FileSystem getFileSystem() { return fs; } /** * Get JobTracker's LocalFileSystem handle. This is used by jobs for * localizing job files to the local disk. */ LocalFileSystem getLocalFileSystem() throws IOException { return localFs; } static Class<? extends JobTrackerInstrumentation> getInstrumentationClass(Configuration conf) { return conf.getClass("mapred.jobtracker.instrumentation", null, JobTrackerInstrumentation.class); } static void setInstrumentationClass(Configuration conf, Class<? extends JobTrackerInstrumentation> t) { conf.setClass("mapred.jobtracker.instrumentation", t, JobTrackerInstrumentation.class); } JobTrackerInstrumentation getInstrumentation() { return myInstrumentation; } public static InetSocketAddress getAddress(Configuration conf) { String jobTrackerStr = conf.get("mapred.job.tracker", "localhost:8012"); return NetUtils.createSocketAddr(jobTrackerStr); } /** * Run forever */ public void offerService() throws InterruptedException, IOException { // Prepare for recovery. This is done irrespective of the status of restart // flag. while (true) { try { recoveryManager.updateRestartCount(); break; } catch (IOException ioe) { LOG.warn("Failed to initialize recovery manager. ", ioe); // wait for some time Thread.sleep(FS_ACCESS_RETRY_PERIOD); LOG.warn("Retrying..."); } } taskScheduler.start(); // Start the recovery after starting the scheduler try { recoveryManager.recover(); } catch (Throwable t) { LOG.warn("Recovery manager crashed! Ignoring.", t); } // refresh the node list as the recovery manager might have added // disallowed trackers refreshHosts(); this.expireTrackersThread = new Thread(this.expireTrackers, "expireTrackers"); this.expireTrackersThread.start(); this.retireJobsThread = new Thread(this.retireJobs, "retireJobs"); this.retireJobsThread.start(); expireLaunchingTaskThread.start(); if (completedJobStatusStore.isActive()) { completedJobsStoreThread = new Thread(completedJobStatusStore, "completedjobsStore-housekeeper"); completedJobsStoreThread.start(); } // start the inter-tracker server once the jt is ready this.interTrackerServer.start(); synchronized (this) { state = State.RUNNING; } LOG.info("Starting RUNNING"); this.interTrackerServer.join(); LOG.info("Stopped interTrackerServer"); } void close() throws IOException { if (this.infoServer != null) { LOG.info("Stopping infoServer"); try { this.infoServer.stop(); } catch (Exception ex) { LOG.warn("Exception shutting down JobTracker", ex); } } if (this.interTrackerServer != null) { LOG.info("Stopping interTrackerServer"); this.interTrackerServer.stop(); } if (this.expireTrackersThread != null && this.expireTrackersThread.isAlive()) { LOG.info("Stopping expireTrackers"); this.expireTrackersThread.interrupt(); try { this.expireTrackersThread.join(); } catch (InterruptedException ex) { ex.printStackTrace(); } } if (this.retireJobsThread != null && this.retireJobsThread.isAlive()) { LOG.info("Stopping retirer"); this.retireJobsThread.interrupt(); try { this.retireJobsThread.join(); } catch (InterruptedException ex) { ex.printStackTrace(); } } if (taskScheduler != null) { taskScheduler.terminate(); } if (this.expireLaunchingTaskThread != null && this.expireLaunchingTaskThread.isAlive()) { LOG.info("Stopping expireLaunchingTasks"); this.expireLaunchingTaskThread.interrupt(); try { this.expireLaunchingTaskThread.join(); } catch (InterruptedException ex) { ex.printStackTrace(); } } if (this.completedJobsStoreThread != null && this.completedJobsStoreThread.isAlive()) { LOG.info("Stopping completedJobsStore thread"); this.completedJobsStoreThread.interrupt(); try { this.completedJobsStoreThread.join(); } catch (InterruptedException ex) { ex.printStackTrace(); } } if (jobHistoryServer != null) { LOG.info("Stopping job history server"); try { jobHistoryServer.shutdown(); } catch (Exception ex) { LOG.warn("Exception shutting down Job History server", ex); } } DelegationTokenRenewal.close(); LOG.info("stopped all jobtracker services"); return; } /////////////////////////////////////////////////////// // Maintain lookup tables; called by JobInProgress // and TaskInProgress /////////////////////////////////////////////////////// void createTaskEntry(TaskAttemptID taskid, String taskTracker, TaskInProgress tip) { LOG.info("Adding task (" + tip.getAttemptType(taskid) + ") " + "'" + taskid + "' to tip " + tip.getTIPId() + ", for tracker '" + taskTracker + "'"); // taskid --> tracker taskidToTrackerMap.put(taskid, taskTracker); // tracker --> taskid Set<TaskAttemptID> taskset = trackerToTaskMap.get(taskTracker); if (taskset == null) { taskset = new TreeSet<TaskAttemptID>(); trackerToTaskMap.put(taskTracker, taskset); } taskset.add(taskid); // taskid --> TIP taskidToTIPMap.put(taskid, tip); } void removeTaskEntry(TaskAttemptID taskid) { // taskid --> tracker String tracker = taskidToTrackerMap.remove(taskid); // tracker --> taskid if (tracker != null) { Set<TaskAttemptID> trackerSet = trackerToTaskMap.get(tracker); if (trackerSet != null) { trackerSet.remove(taskid); } } // taskid --> TIP if (taskidToTIPMap.remove(taskid) != null) { LOG.info("Removing task '" + taskid + "'"); } } /** * Mark a 'task' for removal later. * This function assumes that the JobTracker is locked on entry. * * @param taskTracker the tasktracker at which the 'task' was running * @param taskid completed (success/failure/killed) task */ void markCompletedTaskAttempt(String taskTracker, TaskAttemptID taskid) { // tracker --> taskid Set<TaskAttemptID> taskset = trackerToMarkedTasksMap.get(taskTracker); if (taskset == null) { taskset = new TreeSet<TaskAttemptID>(); trackerToMarkedTasksMap.put(taskTracker, taskset); } taskset.add(taskid); if (LOG.isDebugEnabled()) { LOG.debug("Marked '" + taskid + "' from '" + taskTracker + "'"); } } /** * Mark all 'non-running' jobs of the job for pruning. * This function assumes that the JobTracker is locked on entry. * * @param job the completed job */ void markCompletedJob(JobInProgress job) { for (TaskInProgress tip : job.getTasks(TaskType.JOB_SETUP)) { for (TaskStatus taskStatus : tip.getTaskStatuses()) { if (taskStatus.getRunState() != TaskStatus.State.RUNNING && taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING && taskStatus.getRunState() != TaskStatus.State.UNASSIGNED) { markCompletedTaskAttempt(taskStatus.getTaskTracker(), taskStatus.getTaskID()); } } } for (TaskInProgress tip : job.getTasks(TaskType.MAP)) { for (TaskStatus taskStatus : tip.getTaskStatuses()) { if (taskStatus.getRunState() != TaskStatus.State.RUNNING && taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING && taskStatus.getRunState() != TaskStatus.State.FAILED_UNCLEAN && taskStatus.getRunState() != TaskStatus.State.KILLED_UNCLEAN && taskStatus.getRunState() != TaskStatus.State.UNASSIGNED) { markCompletedTaskAttempt(taskStatus.getTaskTracker(), taskStatus.getTaskID()); } } } for (TaskInProgress tip : job.getTasks(TaskType.REDUCE)) { for (TaskStatus taskStatus : tip.getTaskStatuses()) { if (taskStatus.getRunState() != TaskStatus.State.RUNNING && taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING && taskStatus.getRunState() != TaskStatus.State.FAILED_UNCLEAN && taskStatus.getRunState() != TaskStatus.State.KILLED_UNCLEAN && taskStatus.getRunState() != TaskStatus.State.UNASSIGNED) { markCompletedTaskAttempt(taskStatus.getTaskTracker(), taskStatus.getTaskID()); } } } } /** * Remove all 'marked' tasks running on a given {@link TaskTracker} * from the {@link JobTracker}'s data-structures. * This function assumes that the JobTracker is locked on entry. * * @param taskTracker tasktracker whose 'non-running' tasks are to be purged */ private void removeMarkedTasks(String taskTracker) { // Purge all the 'marked' tasks which were running at taskTracker Set<TaskAttemptID> markedTaskSet = trackerToMarkedTasksMap.get(taskTracker); if (markedTaskSet != null) { for (TaskAttemptID taskid : markedTaskSet) { removeTaskEntry(taskid); if (LOG.isDebugEnabled()) { LOG.debug("Removed marked completed task '" + taskid + "' from '" + taskTracker + "'"); } } // Clear trackerToMarkedTasksMap.remove(taskTracker); } } /** * Call {@link #removeTaskEntry(String)} for each of the * job's tasks. * When the JobTracker is retiring the long-completed * job, either because it has outlived {@link #RETIRE_JOB_INTERVAL} * or the limit of {@link #MAX_COMPLETE_USER_JOBS_IN_MEMORY} jobs * has been reached, we can afford to nuke all it's tasks; a little * unsafe, but practically feasible. * * @param job the job about to be 'retired' */ synchronized void removeJobTasks(JobInProgress job) { // iterate over all the task types for (TaskType type : TaskType.values()) { // iterate over all the tips of the type under consideration for (TaskInProgress tip : job.getTasks(type)) { // iterate over all the task-ids in the tip under consideration for (TaskAttemptID id : tip.getAllTaskAttemptIDs()) { // remove the task-id entry from the jobtracker removeTaskEntry(id); } } } } /** * Safe clean-up all data structures at the end of the * job (success/failure/killed). * Here we also ensure that for a given user we maintain * information for only MAX_COMPLETE_USER_JOBS_IN_MEMORY jobs * on the JobTracker. * * @param job completed job. */ synchronized void finalizeJob(JobInProgress job) { // Mark the 'non-running' tasks for pruning markCompletedJob(job); JobEndNotifier.registerNotification(job.getJobConf(), job.getStatus()); // start the merge of log files JobID id = job.getStatus().getJobID(); if (job.hasRestarted()) { try { JobHistory.JobInfo.finalizeRecovery(id, job.getJobConf()); } catch (IOException ioe) { LOG.info("Failed to finalize the log file recovery for job " + id, ioe); } } // mark the job as completed try { JobHistory.JobInfo.markCompleted(id); } catch (IOException ioe) { LOG.info("Failed to mark job " + id + " as completed!", ioe); } final JobTrackerInstrumentation metrics = getInstrumentation(); metrics.finalizeJob(conf, id); long now = clock.getTime(); // mark the job for cleanup at all the trackers addJobForCleanup(id); // add the (single-job) blacklisted trackers to potentially faulty list // for possible heuristic graylisting across all jobs if (job.getStatus().getRunState() == JobStatus.SUCCEEDED) { if (job.getNoOfBlackListedTrackers() > 0) { for (String hostName : job.getBlackListedTrackers()) { faultyTrackers.incrementFaults(hostName); } } } String jobUser = job.getProfile().getUser(); // add to the user to jobs mapping synchronized (userToJobsMap) { ArrayList<JobInProgress> userJobs = userToJobsMap.get(jobUser); if (userJobs == null) { userJobs = new ArrayList<JobInProgress>(); userToJobsMap.put(jobUser, userJobs); } userJobs.add(job); } } /////////////////////////////////////////////////////// // Accessors for objects that want info on jobs, tasks, // trackers, etc. /////////////////////////////////////////////////////// public int getTotalSubmissions() { return totalSubmissions; } public String getJobTrackerMachine() { return localMachine; } /** * Get the unique identifier (ie. timestamp) of this job tracker start. * @return a string with a unique identifier */ public String getTrackerIdentifier() { return trackerIdentifier; } public int getTrackerPort() { return port; } public int getInfoPort() { return infoPort; } public long getStartTime() { return startTime; } public Vector<JobInProgress> runningJobs() { Vector<JobInProgress> v = new Vector<JobInProgress>(); for (Iterator it = jobs.values().iterator(); it.hasNext();) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if (status.getRunState() == JobStatus.RUNNING) { v.add(jip); } } return v; } /** * Version that is called from a timer thread, and therefore needs to be * careful to synchronize. */ public synchronized List<JobInProgress> getRunningJobs() { synchronized (jobs) { return runningJobs(); } } public Vector<JobInProgress> failedJobs() { Vector<JobInProgress> v = new Vector<JobInProgress>(); for (Iterator it = jobs.values().iterator(); it.hasNext();) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if ((status.getRunState() == JobStatus.FAILED) || (status.getRunState() == JobStatus.KILLED)) { v.add(jip); } } return v; } public Vector<JobInProgress> completedJobs() { Vector<JobInProgress> v = new Vector<JobInProgress>(); for (Iterator it = jobs.values().iterator(); it.hasNext();) { JobInProgress jip = (JobInProgress) it.next(); JobStatus status = jip.getStatus(); if (status.getRunState() == JobStatus.SUCCEEDED) { v.add(jip); } } return v; } /** * Get all the task trackers in the cluster * * @return {@link Collection} of {@link TaskTrackerStatus} */ // lock to taskTrackers should hold JT lock first. public synchronized Collection<TaskTrackerStatus> taskTrackers() { Collection<TaskTrackerStatus> ttStatuses; synchronized (taskTrackers) { ttStatuses = new ArrayList<TaskTrackerStatus>(taskTrackers.values().size()); for (TaskTracker tt : taskTrackers.values()) { ttStatuses.add(tt.getStatus()); } } return ttStatuses; } /** * Get the active task tracker statuses in the cluster * * @return {@link Collection} of active {@link TaskTrackerStatus} */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public Collection<TaskTrackerStatus> activeTaskTrackers() { Collection<TaskTrackerStatus> activeTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for ( TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status); } } } return activeTrackers; } /** * Get the active, blacklisted, and graylisted task tracker names in the * cluster. The first element in the returned list contains the list of * active tracker names; the second element in the returned list contains * the list of blacklisted tracker names; and the third contains the list * of graylisted tracker names. Note that the blacklist is disjoint * from the active list, but the graylist is not: initially, graylisted * trackers are still active and therefore will appear in both lists. * (Graylisted trackers can later be blacklisted, in which case they'll * be removed from the active list and added to the blacklist, but they * remain on the graylist in this case. Blacklisting comes about via the * health-check script, while graylisting is heuristically based on the * number of per-job blacklistings in a specified time interval.) */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public List<List<String>> taskTrackerNames() { List<String> activeTrackers = new ArrayList<String>(); List<String> blacklistedTrackers = new ArrayList<String>(); List<String> graylistedTrackers = new ArrayList<String>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); String hostName = status.getHost(); String trackerName = status.getTrackerName(); if (!faultyTrackers.isBlacklisted(hostName)) { activeTrackers.add(trackerName); } else { blacklistedTrackers.add(trackerName); } if (faultyTrackers.isGraylisted(hostName)) { graylistedTrackers.add(trackerName); } } } List<List<String>> result = new ArrayList<List<String>>(3); result.add(activeTrackers); result.add(blacklistedTrackers); result.add(graylistedTrackers); return result; } /** * Get the statuses of the blacklisted task trackers in the cluster. * * @return {@link Collection} of blacklisted {@link TaskTrackerStatus} */ // used by the web UI (machines.jsp) public Collection<TaskTrackerStatus> blacklistedTaskTrackers() { return blackOrGraylistedTaskTrackers(false); } /** * Get the statuses of the graylisted task trackers in the cluster. * * @return {@link Collection} of graylisted {@link TaskTrackerStatus} */ public Collection<TaskTrackerStatus> graylistedTaskTrackers() { return blackOrGraylistedTaskTrackers(true); } // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized private Collection<TaskTrackerStatus> blackOrGraylistedTaskTrackers(boolean gray) { Collection<TaskTrackerStatus> listedTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); boolean listed = gray? faultyTrackers.isGraylisted(status.getHost()) : faultyTrackers.isBlacklisted(status.getHost()); if (listed) { listedTrackers.add(status); } } } return listedTrackers; } synchronized int getFaultCount(String hostName) { return faultyTrackers.getFaultCount(hostName); } /** * Get the number of task trackers that are blacklisted (via health-check * script) across all jobs. * * @return */ int getBlacklistedTrackerCount() { return faultyTrackers.numBlacklistedTrackers; } /** * Get the number of task trackers that are graylisted (via heuristics on * single-job blacklistings) across all jobs. * * @return */ int getGraylistedTrackerCount() { return faultyTrackers.numGraylistedTrackers; } /** * Whether the tracker is blacklisted or not * * @param trackerID * * @return true if blacklisted, false otherwise */ synchronized public boolean isBlacklisted(String trackerID) { TaskTrackerStatus status = getTaskTrackerStatus(trackerID); if (status != null) { return faultyTrackers.isBlacklisted(status.getHost()); } return false; } /** * Whether the tracker is graylisted or not * * @param trackerID * * @return true if graylisted, false otherwise */ synchronized public boolean isGraylisted(String trackerID) { TaskTrackerStatus status = getTaskTrackerStatus(trackerID); if (status != null) { return faultyTrackers.isGraylisted(status.getHost()); } return false; } // lock to taskTrackers should hold JT lock first. synchronized public TaskTrackerStatus getTaskTrackerStatus(String trackerID) { TaskTracker taskTracker; synchronized (taskTrackers) { taskTracker = taskTrackers.get(trackerID); } return (taskTracker == null) ? null : taskTracker.getStatus(); } // lock to taskTrackers should hold JT lock first. synchronized public TaskTracker getTaskTracker(String trackerID) { synchronized (taskTrackers) { return taskTrackers.get(trackerID); } } JobTrackerStatistics getStatistics() { return statistics; } /** * Adds a new node to the jobtracker. It involves adding it to the expiry * thread and adding it for resolution * * Assumes JobTracker, taskTrackers and trackerExpiryQueue is locked on entry * * @param status Task Tracker's status */ private void addNewTracker(TaskTracker taskTracker) throws UnknownHostException { TaskTrackerStatus status = taskTracker.getStatus(); trackerExpiryQueue.add(status); // Register the tracker if its not registered String hostname = status.getHost(); if (getNode(status.getTrackerName()) == null) { // Making the network location resolution inline .. resolveAndAddToTopology(hostname); } // add it to the set of tracker per host Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostname); if (trackers == null) { trackers = Collections.synchronizedSet(new HashSet<TaskTracker>()); hostnameToTaskTracker.put(hostname, trackers); } statistics.taskTrackerAdded(status.getTrackerName()); getInstrumentation().addTrackers(1); LOG.info("Adding tracker " + status.getTrackerName() + " to host " + hostname); trackers.add(taskTracker); } public Node resolveAndAddToTopology(String name) throws UnknownHostException { List <String> tmpList = new ArrayList<String>(1); tmpList.add(name); List <String> rNameList = dnsToSwitchMapping.resolve(tmpList); String rName = rNameList.get(0); String networkLoc = NodeBase.normalize(rName); return addHostToNodeMapping(name, networkLoc); } private Node addHostToNodeMapping(String host, String networkLoc) { Node node = null; synchronized (nodesAtMaxLevel) { if ((node = clusterMap.getNode(networkLoc+"/"+host)) == null) { node = new NodeBase(host, networkLoc); clusterMap.add(node); if (node.getLevel() < getNumTaskCacheLevels()) { LOG.fatal("Got a host whose level is: " + node.getLevel() + "." + " Should get at least a level of value: " + getNumTaskCacheLevels()); try { stopTracker(); } catch (IOException ie) { LOG.warn("Exception encountered during shutdown: " + StringUtils.stringifyException(ie)); System.exit(-1); } } hostnameToNodeMap.put(host, node); // Make an entry for the node at the max level in the cache nodesAtMaxLevel.add(getParentNode(node, getNumTaskCacheLevels() - 1)); } } return node; } /** * Returns a collection of nodes at the max level */ public Collection<Node> getNodesAtMaxLevel() { return nodesAtMaxLevel; } public static Node getParentNode(Node node, int level) { for (int i = 0; i < level; ++i) { node = node.getParent(); } return node; } /** * Return the Node in the network topology that corresponds to the hostname */ public Node getNode(String name) { return hostnameToNodeMap.get(name); } public int getNumTaskCacheLevels() { return numTaskCacheLevels; } public int getNumResolvedTaskTrackers() { return numResolved; } public int getNumberOfUniqueHosts() { return uniqueHostsMap.size(); } public void addJobInProgressListener(JobInProgressListener listener) { jobInProgressListeners.add(listener); } public void removeJobInProgressListener(JobInProgressListener listener) { jobInProgressListeners.remove(listener); } // Update the listeners about the job // Assuming JobTracker is locked on entry. private void updateJobInProgressListeners(JobChangeEvent event) { for (JobInProgressListener listener : jobInProgressListeners) { listener.jobUpdated(event); } } /** * Return the {@link QueueManager} associated with the JobTracker. */ public QueueManager getQueueManager() { return queueManager; } //////////////////////////////////////////////////// // InterTrackerProtocol //////////////////////////////////////////////////// public String getBuildVersion() throws IOException{ return VersionInfo.getBuildVersion(); } /** * The periodic heartbeat mechanism between the {@link TaskTracker} and * the {@link JobTracker}. * * The {@link JobTracker} processes the status information sent by the * {@link TaskTracker} and responds with instructions to start/stop * tasks or jobs, and also 'reset' instructions during contingencies. */ public synchronized HeartbeatResponse heartbeat(TaskTrackerStatus status, boolean restarted, boolean initialContact, boolean acceptNewTasks, short responseId) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Got heartbeat from: " + status.getTrackerName() + " (restarted: " + restarted + " initialContact: " + initialContact + " acceptNewTasks: " + acceptNewTasks + ")" + " with responseId: " + responseId); } // Make sure heartbeat is from a tasktracker allowed by the jobtracker. if (!acceptTaskTracker(status)) { throw new DisallowedTaskTrackerException(status); } // First check if the last heartbeat response got through String trackerName = status.getTrackerName(); long now = clock.getTime(); if (restarted) { faultyTrackers.markTrackerHealthy(status.getHost()); } else { faultyTrackers.checkTrackerFaultTimeout(status.getHost(), now); } HeartbeatResponse prevHeartbeatResponse = trackerToHeartbeatResponseMap.get(trackerName); boolean addRestartInfo = false; if (initialContact != true) { // If this isn't the 'initial contact' from the tasktracker, // there is something seriously wrong if the JobTracker has // no record of the 'previous heartbeat'; if so, ask the // tasktracker to re-initialize itself. if (prevHeartbeatResponse == null) { // This is the first heartbeat from the old tracker to the newly // started JobTracker if (hasRestarted()) { addRestartInfo = true; // inform the recovery manager about this tracker joining back recoveryManager.unMarkTracker(trackerName); } else { // Jobtracker might have restarted but no recovery is needed // otherwise this code should not be reached LOG.warn("Serious problem, cannot find record of 'previous' " + "heartbeat for '" + trackerName + "'; reinitializing the tasktracker"); return new HeartbeatResponse(responseId, new TaskTrackerAction[] {new ReinitTrackerAction()}); } } else { // It is completely safe to not process a 'duplicate' heartbeat from a // {@link TaskTracker} since it resends the heartbeat when rpcs are // lost see {@link TaskTracker.transmitHeartbeat()}; // acknowledge it by re-sending the previous response to let the // {@link TaskTracker} go forward. if (prevHeartbeatResponse.getResponseId() != responseId) { LOG.info("Ignoring 'duplicate' heartbeat from '" + trackerName + "'; resending the previous 'lost' response"); return prevHeartbeatResponse; } } } // Process this heartbeat short newResponseId = (short)(responseId + 1); status.setLastSeen(now); if (!processHeartbeat(status, initialContact, now)) { if (prevHeartbeatResponse != null) { trackerToHeartbeatResponseMap.remove(trackerName); } return new HeartbeatResponse(newResponseId, new TaskTrackerAction[] {new ReinitTrackerAction()}); } // Initialize the response to be sent for the heartbeat HeartbeatResponse response = new HeartbeatResponse(newResponseId, null); List<TaskTrackerAction> actions = new ArrayList<TaskTrackerAction>(); boolean isBlacklisted = faultyTrackers.isBlacklisted(status.getHost()); // Check for new tasks to be executed on the tasktracker if (recoveryManager.shouldSchedule() && acceptNewTasks && !isBlacklisted) { TaskTrackerStatus taskTrackerStatus = getTaskTrackerStatus(trackerName); if (taskTrackerStatus == null) { LOG.warn("Unknown task tracker polling; ignoring: " + trackerName); } else { List<Task> tasks = getSetupAndCleanupTasks(taskTrackerStatus); if (tasks == null ) { for (TaskStatus t : taskTrackerStatus.getTaskReports()) { System.out.println(t.getStateString()); } tasks = taskScheduler.assignTasks(taskTrackers.get(trackerName)); } if (tasks != null) { for (Task task : tasks) { expireLaunchingTasks.addNewTask(task.getTaskID()); if(LOG.isDebugEnabled()) { LOG.debug(trackerName + " -> LaunchTask: " + task.getTaskID()); } /* NEW BLOCK */ TaskReport[] as = this.getMapTaskReports(task.getJobID()); for (TaskReport a : as) { // if (a.getFinishTime()!= 0) { // LOG.info("YYYY " + a.getTaskID() + " " + (a.getFinishTime() - a.getStartTime()) + " [ms]."); // } if(task.getTaskID().getTaskID() == a.getTaskID()) { a.setRunOnGPU(task.runOnGPU()); a.setGPUDeviceId(task.GPUDeviceId()); LOG.info("AAAA " + a.getRunOnGPU() + " " + task.getTaskID().getTaskID().getId()); LOG.info("CCCC " + as[task.getTaskID().getTaskID().getId()].getRunOnGPU() + " " + a.getTaskId()); LOG.info("BBBB " + getMapTaskReports(task.getJobID())[task.getTaskID().getTaskID().getId()].getRunOnGPU() + " " + task.getTaskID().getTaskID().getId()); } } //if (task.isMapTask() && (acceptNewTasks == Tasks.GPUTasks)) { // task.setRunOnGPU(true); //} /* NEW BLOCK END*/ actions.add(new LaunchTaskAction(task)); } } } } // Check for tasks to be killed List<TaskTrackerAction> killTasksList = getTasksToKill(trackerName); if (killTasksList != null) { actions.addAll(killTasksList); } // Check for jobs to be killed/cleanedup List<TaskTrackerAction> killJobsList = getJobsForCleanup(trackerName); if (killJobsList != null) { actions.addAll(killJobsList); } // Check for tasks whose outputs can be saved List<TaskTrackerAction> commitTasksList = getTasksToSave(status); if (commitTasksList != null) { actions.addAll(commitTasksList); } // calculate next heartbeat interval and put in heartbeat response int nextInterval = getNextHeartbeatInterval(); response.setHeartbeatInterval(nextInterval); response.setActions( actions.toArray(new TaskTrackerAction[actions.size()])); // check if the restart info is req if (addRestartInfo) { response.setRecoveredJobs(recoveryManager.getJobsToRecover()); } // Update the trackerToHeartbeatResponseMap trackerToHeartbeatResponseMap.put(trackerName, response); // Done processing the hearbeat, now remove 'marked' tasks removeMarkedTasks(trackerName); return response; } /** * Calculates next heartbeat interval using cluster size. * Heartbeat interval is incremented by 1 second for every 100 nodes by default. * @return next heartbeat interval. */ public int getNextHeartbeatInterval() { // get the no of task trackers int clusterSize = getClusterStatus().getTaskTrackers(); int heartbeatInterval = Math.max( (int)(1000 * HEARTBEATS_SCALING_FACTOR * Math.ceil((double)clusterSize / NUM_HEARTBEATS_IN_SECOND)), HEARTBEAT_INTERVAL_MIN) ; return heartbeatInterval; } /** * Return if the specified tasktracker is in the hosts list, * if one was configured. If none was configured, then this * returns true. */ private boolean inHostsList(TaskTrackerStatus status) { Set<String> hostsList = hostsReader.getHosts(); return (hostsList.isEmpty() || hostsList.contains(status.getHost())); } /** * Return if the specified tasktracker is in the exclude list. */ private boolean inExcludedHostsList(TaskTrackerStatus status) { Set<String> excludeList = hostsReader.getExcludedHosts(); return excludeList.contains(status.getHost()); } /** * Returns true if the tasktracker is in the hosts list and * not in the exclude list. */ private boolean acceptTaskTracker(TaskTrackerStatus status) { return (inHostsList(status) && !inExcludedHostsList(status)); } /** * Update the last recorded status for the given task tracker. * It assumes that the taskTrackers are locked on entry. * @param trackerName The name of the tracker * @param status The new status for the task tracker * @return Was an old status found? */ private boolean updateTaskTrackerStatus(String trackerName, TaskTrackerStatus status) { TaskTracker tt = getTaskTracker(trackerName); TaskTrackerStatus oldStatus = (tt == null) ? null : tt.getStatus(); if (oldStatus != null) { totalMaps -= oldStatus.countMapTasks(); totalReduces -= oldStatus.countReduceTasks(); occupiedMapSlots -= oldStatus.countOccupiedMapSlots(); occupiedReduceSlots -= oldStatus.countOccupiedReduceSlots(); getInstrumentation().decRunningMaps(oldStatus.countMapTasks()); getInstrumentation().decRunningReduces(oldStatus.countReduceTasks()); getInstrumentation().decOccupiedMapSlots(oldStatus.countOccupiedMapSlots()); getInstrumentation().decOccupiedReduceSlots(oldStatus.countOccupiedReduceSlots()); if (!faultyTrackers.isBlacklisted(oldStatus.getHost())) { int mapSlots = oldStatus.getMaxMapSlots(); totalMapTaskCapacity -= mapSlots; int reduceSlots = oldStatus.getMaxReduceSlots(); totalReduceTaskCapacity -= reduceSlots; } if (status == null) { taskTrackers.remove(trackerName); Integer numTaskTrackersInHost = uniqueHostsMap.get(oldStatus.getHost()); if (numTaskTrackersInHost != null) { numTaskTrackersInHost --; if (numTaskTrackersInHost > 0) { uniqueHostsMap.put(oldStatus.getHost(), numTaskTrackersInHost); } else { uniqueHostsMap.remove(oldStatus.getHost()); } } } } if (status != null) { totalMaps += status.countMapTasks(); totalReduces += status.countReduceTasks(); occupiedMapSlots += status.countOccupiedMapSlots(); occupiedReduceSlots += status.countOccupiedReduceSlots(); getInstrumentation().addRunningMaps(status.countMapTasks()); getInstrumentation().addRunningReduces(status.countReduceTasks()); getInstrumentation().addOccupiedMapSlots(status.countOccupiedMapSlots()); getInstrumentation().addOccupiedReduceSlots(status.countOccupiedReduceSlots()); if (!faultyTrackers.isBlacklisted(status.getHost())) { int mapSlots = status.getMaxMapSlots(); totalMapTaskCapacity += mapSlots; int reduceSlots = status.getMaxReduceSlots(); totalReduceTaskCapacity += reduceSlots; } boolean alreadyPresent = false; TaskTracker taskTracker = taskTrackers.get(trackerName); if (taskTracker != null) { alreadyPresent = true; } else { taskTracker = new TaskTracker(trackerName); } taskTracker.setStatus(status); taskTrackers.put(trackerName, taskTracker); if (LOG.isDebugEnabled()) { int runningMaps = 0, runningReduces = 0; int commitPendingMaps = 0, commitPendingReduces = 0; int unassignedMaps = 0, unassignedReduces = 0; int miscMaps = 0, miscReduces = 0; List<TaskStatus> taskReports = status.getTaskReports(); for (Iterator<TaskStatus> it = taskReports.iterator(); it.hasNext();) { TaskStatus ts = (TaskStatus) it.next(); boolean isMap = ts.getIsMap(); TaskStatus.State state = ts.getRunState(); if (state == TaskStatus.State.RUNNING) { if (isMap) { ++runningMaps; } else { ++runningReduces; } } else if (state == TaskStatus.State.UNASSIGNED) { if (isMap) { ++unassignedMaps; } else { ++unassignedReduces; } } else if (state == TaskStatus.State.COMMIT_PENDING) { if (isMap) { ++commitPendingMaps; } else { ++commitPendingReduces; } } else { if (isMap) { ++miscMaps; } else { ++miscReduces; } } } LOG.debug(trackerName + ": Status -" + " running(m) = " + runningMaps + " unassigned(m) = " + unassignedMaps + " commit_pending(m) = " + commitPendingMaps + " misc(m) = " + miscMaps + " running(r) = " + runningReduces + " unassigned(r) = " + unassignedReduces + " commit_pending(r) = " + commitPendingReduces + " misc(r) = " + miscReduces); } if (!alreadyPresent) { Integer numTaskTrackersInHost = uniqueHostsMap.get(status.getHost()); if (numTaskTrackersInHost == null) { numTaskTrackersInHost = 0; } numTaskTrackersInHost ++; uniqueHostsMap.put(status.getHost(), numTaskTrackersInHost); } } getInstrumentation().setMapSlots(totalMapTaskCapacity); getInstrumentation().setReduceSlots(totalReduceTaskCapacity); return oldStatus != null; } // Increment the number of reserved slots in the cluster. // This method assumes the caller has JobTracker lock. void incrementReservations(TaskType type, int reservedSlots) { if (type.equals(TaskType.MAP)) { reservedMapSlots += reservedSlots; } else if (type.equals(TaskType.REDUCE)) { reservedReduceSlots += reservedSlots; } } // Decrement the number of reserved slots in the cluster. // This method assumes the caller has JobTracker lock. void decrementReservations(TaskType type, int reservedSlots) { if (type.equals(TaskType.MAP)) { reservedMapSlots -= reservedSlots; } else if (type.equals(TaskType.REDUCE)) { reservedReduceSlots -= reservedSlots; } } private void updateNodeHealthStatus(TaskTrackerStatus trackerStatus, long timeStamp) { TaskTrackerHealthStatus status = trackerStatus.getHealthStatus(); synchronized (faultyTrackers) { faultyTrackers.setNodeHealthStatus(trackerStatus.getHost(), status.isNodeHealthy(), status.getHealthReport(), timeStamp); } } /** * Process incoming heartbeat messages from the task trackers. */ private synchronized boolean processHeartbeat( TaskTrackerStatus trackerStatus, boolean initialContact, long timeStamp) throws UnknownHostException { getInstrumentation().heartbeat(); String trackerName = trackerStatus.getTrackerName(); synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus); TaskTracker taskTracker = getTaskTracker(trackerName); if (initialContact) { // If it's first contact, then clear out // any state hanging around if (seenBefore) { lostTaskTracker(taskTracker); } } else { // If not first contact, there should be some record of the tracker if (!seenBefore) { LOG.warn("Status from unknown Tracker : " + trackerName); updateTaskTrackerStatus(trackerName, null); return false; } } if (initialContact) { // if this is lost tracker that came back now, and if it's blacklisted // increment the count of blacklisted trackers in the cluster if (isBlacklisted(trackerName)) { faultyTrackers.incrBlacklistedTrackers(1); } // This could now throw an UnknownHostException but only if the // TaskTracker status itself has an invalid name addNewTracker(taskTracker); } } } updateTaskStatuses(trackerStatus); updateNodeHealthStatus(trackerStatus, timeStamp); return true; } /** * A tracker wants to know if any of its Tasks have been * closed (because the job completed, whether successfully or not) */ private synchronized List<TaskTrackerAction> getTasksToKill( String taskTracker) { Set<TaskAttemptID> taskIds = trackerToTaskMap.get(taskTracker); List<TaskTrackerAction> killList = new ArrayList<TaskTrackerAction>(); if (taskIds != null) { for (TaskAttemptID killTaskId : taskIds) { TaskInProgress tip = taskidToTIPMap.get(killTaskId); if (tip == null) { continue; } if (tip.shouldClose(killTaskId)) { // // This is how the JobTracker ends a task at the TaskTracker. // It may be successfully completed, or may be killed in // mid-execution. // if (!tip.getJob().isComplete()) { killList.add(new KillTaskAction(killTaskId)); if (LOG.isDebugEnabled()) { LOG.debug(taskTracker + " -> KillTaskAction: " + killTaskId); } } } } } // add the stray attempts for uninited jobs synchronized (trackerToTasksToCleanup) { Set<TaskAttemptID> set = trackerToTasksToCleanup.remove(taskTracker); if (set != null) { for (TaskAttemptID id : set) { killList.add(new KillTaskAction(id)); } } } return killList; } /** * Add a job to cleanup for the tracker. */ private void addJobForCleanup(JobID id) { for (String taskTracker : taskTrackers.keySet()) { if (LOG.isDebugEnabled()) { LOG.debug("Marking job " + id + " for cleanup by tracker " + taskTracker); } synchronized (trackerToJobsToCleanup) { Set<JobID> jobsToKill = trackerToJobsToCleanup.get(taskTracker); if (jobsToKill == null) { jobsToKill = new HashSet<JobID>(); trackerToJobsToCleanup.put(taskTracker, jobsToKill); } jobsToKill.add(id); } } } /** * A tracker wants to know if any job needs cleanup because the job completed. */ private List<TaskTrackerAction> getJobsForCleanup(String taskTracker) { Set<JobID> jobs = null; synchronized (trackerToJobsToCleanup) { jobs = trackerToJobsToCleanup.remove(taskTracker); } if (jobs != null) { // prepare the actions list List<TaskTrackerAction> killList = new ArrayList<TaskTrackerAction>(); for (JobID killJobId : jobs) { killList.add(new KillJobAction(killJobId)); if(LOG.isDebugEnabled()) { LOG.debug(taskTracker + " -> KillJobAction: " + killJobId); } } return killList; } return null; } /** * A tracker wants to know if any of its Tasks can be committed */ private synchronized List<TaskTrackerAction> getTasksToSave( TaskTrackerStatus tts) { List<TaskStatus> taskStatuses = tts.getTaskReports(); if (taskStatuses != null) { List<TaskTrackerAction> saveList = new ArrayList<TaskTrackerAction>(); for (TaskStatus taskStatus : taskStatuses) { if (taskStatus.getRunState() == TaskStatus.State.COMMIT_PENDING) { TaskAttemptID taskId = taskStatus.getTaskID(); TaskInProgress tip = taskidToTIPMap.get(taskId); if (tip == null) { continue; } if (tip.shouldCommit(taskId)) { saveList.add(new CommitTaskAction(taskId)); if (LOG.isDebugEnabled()) { LOG.debug(tts.getTrackerName() + " -> CommitTaskAction: " + taskId); } } } } return saveList; } return null; } // returns cleanup tasks first, then setup tasks. synchronized List<Task> getSetupAndCleanupTasks( TaskTrackerStatus taskTracker) throws IOException { int maxMapTasks = taskTracker.getMaxMapSlots(); int maxReduceTasks = taskTracker.getMaxReduceSlots(); int numMaps = taskTracker.countOccupiedMapSlots(); int numReduces = taskTracker.countOccupiedReduceSlots(); int numTaskTrackers = getClusterStatus().getTaskTrackers(); int numUniqueHosts = getNumberOfUniqueHosts(); Task t = null; synchronized (jobs) { if (numMaps < maxMapTasks) { for (Iterator<JobInProgress> it = jobs.values().iterator(); it.hasNext();) { JobInProgress job = it.next(); t = job.obtainJobCleanupTask(taskTracker, numTaskTrackers, numUniqueHosts, true); if (t != null) { return Collections.singletonList(t); } } for (Iterator<JobInProgress> it = jobs.values().iterator(); it.hasNext();) { JobInProgress job = it.next(); t = job.obtainTaskCleanupTask(taskTracker, true); if (t != null) { return Collections.singletonList(t); } } for (Iterator<JobInProgress> it = jobs.values().iterator(); it.hasNext();) { JobInProgress job = it.next(); t = job.obtainJobSetupTask(taskTracker, numTaskTrackers, numUniqueHosts, true); if (t != null) { return Collections.singletonList(t); } } } if (numReduces < maxReduceTasks) { for (Iterator<JobInProgress> it = jobs.values().iterator(); it.hasNext();) { JobInProgress job = it.next(); t = job.obtainJobCleanupTask(taskTracker, numTaskTrackers, numUniqueHosts, false); if (t != null) { return Collections.singletonList(t); } } for (Iterator<JobInProgress> it = jobs.values().iterator(); it.hasNext();) { JobInProgress job = it.next(); t = job.obtainTaskCleanupTask(taskTracker, false); if (t != null) { return Collections.singletonList(t); } } for (Iterator<JobInProgress> it = jobs.values().iterator(); it.hasNext();) { JobInProgress job = it.next(); t = job.obtainJobSetupTask(taskTracker, numTaskTrackers, numUniqueHosts, false); if (t != null) { return Collections.singletonList(t); } } } } return null; } /** * Grab the local fs name */ public synchronized String getFilesystemName() throws IOException { if (fs == null) { throw new IllegalStateException("FileSystem object not available yet"); } return fs.getUri().toString(); } /** * Returns a handle to the JobTracker's Configuration */ public JobConf getConf() { return conf; } public void reportTaskTrackerError(String taskTracker, String errorClass, String errorMessage) throws IOException { LOG.warn("Report from " + taskTracker + ": " + errorMessage); } /** * Remove the job_ from jobids to get the unique string. */ static String getJobUniqueString(String jobid) { return jobid.substring(4); } //////////////////////////////////////////////////// // JobSubmissionProtocol //////////////////////////////////////////////////// /** * Allocates a new JobId string. */ public synchronized JobID getNewJobId() throws IOException { return new JobID(getTrackerIdentifier(), nextJobId++); } /** * JobTracker.submitJob() kicks off a new job. * * Create a 'JobInProgress' object, which contains both JobProfile * and JobStatus. Those two sub-objects are sometimes shipped outside * of the JobTracker. But JobInProgress adds info that's useful for * the JobTracker alone. */ public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException { JobInfo jobInfo = null; UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); synchronized (this) { if (jobs.containsKey(jobId)) { // job already running, don't start twice return jobs.get(jobId).getStatus(); } jobInfo = new JobInfo(jobId, new Text(ugi.getShortUserName()), new Path(jobSubmitDir)); } // Create the JobInProgress, do not lock the JobTracker since // we are about to copy job.xml from HDFS JobInProgress job = null; try { job = new JobInProgress(this, this.conf, jobInfo, 0, ts); } catch (Exception e) { throw new IOException(e); } synchronized (this) { // check if queue is RUNNING String queue = job.getProfile().getQueueName(); if (!queueManager.isRunning(queue)) { throw new IOException("Queue \"" + queue + "\" is not running"); } try { aclsManager.checkAccess(job, ugi, Operation.SUBMIT_JOB); } catch (IOException ioe) { LOG.warn("Access denied for user " + job.getJobConf().getUser() + ". Ignoring job " + jobId, ioe); job.fail(); throw ioe; } // Check the job if it cannot run in the cluster because of invalid memory // requirements. try { checkMemoryRequirements(job); } catch (IOException ioe) { throw ioe; } boolean recovered = true; // TODO: Once the Job recovery code is there, // (MAPREDUCE-873) we // must pass the "recovered" flag accurately. // This is handled in the trunk/0.22 if (!recovered) { // Store the information in a file so that the job can be recovered // later (if at all) Path jobDir = getSystemDirectoryForJob(jobId); FileSystem.mkdirs(fs, jobDir, new FsPermission(SYSTEM_DIR_PERMISSION)); FSDataOutputStream out = fs.create(getSystemFileForJob(jobId)); jobInfo.write(out); out.close(); } // Submit the job JobStatus status; try { status = addJob(jobId, job); } catch (IOException ioe) { LOG.info("Job " + jobId + " submission failed!", ioe); status = job.getStatus(); status.setFailureInfo(StringUtils.stringifyException(ioe)); failJob(job); throw ioe; } return status; } } /** * @see org.apache.hadoop.mapred.JobSubmissionProtocol#getStagingAreaDir() */ public String getStagingAreaDir() throws IOException { try{ final String user = UserGroupInformation.getCurrentUser().getShortUserName(); return getMROwner().doAs(new PrivilegedExceptionAction<String>() { @Override public String run() throws Exception { return getStagingAreaDirInternal(user); } }); } catch(InterruptedException ie) { throw new IOException(ie); } } private String getStagingAreaDirInternal(String user) throws IOException { final Path stagingRootDir = new Path(conf.get("mapreduce.jobtracker.staging.root.dir", "/tmp/hadoop/mapred/staging")); final FileSystem fs = stagingRootDir.getFileSystem(conf); return fs.makeQualified(new Path(stagingRootDir, user+"/.staging")).toString(); } /** * Adds a job to the jobtracker. Make sure that the checks are inplace before * adding a job. This is the core job submission logic * @param jobId The id for the job submitted which needs to be added */ private synchronized JobStatus addJob(JobID jobId, JobInProgress job) throws IOException { totalSubmissions++; synchronized (jobs) { synchronized (taskScheduler) { jobs.put(job.getProfile().getJobID(), job); for (JobInProgressListener listener : jobInProgressListeners) { listener.jobAdded(job); } } } myInstrumentation.submitJob(job.getJobConf(), jobId); job.getQueueMetrics().submitJob(job.getJobConf(), jobId); LOG.info("Job " + jobId + " added successfully for user '" + job.getJobConf().getUser() + "' to queue '" + job.getJobConf().getQueueName() + "'"); AuditLogger.logSuccess(job.getUser(), Operation.SUBMIT_JOB.name(), jobId.toString()); return job.getStatus(); } /** * Are ACLs for authorization checks enabled on the JT? * * @return */ boolean areACLsEnabled() { return conf.getBoolean(JobConf.MR_ACLS_ENABLED, false); } /**@deprecated use {@link #getClusterStatus(boolean)}*/ @Deprecated public synchronized ClusterStatus getClusterStatus() { return getClusterStatus(false); } public synchronized ClusterStatus getClusterStatus(boolean detailed) { synchronized (taskTrackers) { if (detailed) { List<List<String>> trackerNames = taskTrackerNames(); return new ClusterStatus(trackerNames.get(0), trackerNames.get(1), trackerNames.get(2), TASKTRACKER_EXPIRY_INTERVAL, totalMaps, totalReduces, totalMapTaskCapacity, totalReduceTaskCapacity, state, getExcludedNodes().size() ); } else { return new ClusterStatus( // active trackers include graylisted but not blacklisted ones: taskTrackers.size() - getBlacklistedTrackerCount(), getBlacklistedTrackerCount(), getGraylistedTrackerCount(), TASKTRACKER_EXPIRY_INTERVAL, totalMaps, totalReduces, totalMapTaskCapacity, totalReduceTaskCapacity, state, getExcludedNodes().size()); } } } public synchronized ClusterMetrics getClusterMetrics() { return new ClusterMetrics(totalMaps, totalReduces, occupiedMapSlots, occupiedReduceSlots, reservedMapSlots, reservedReduceSlots, totalMapTaskCapacity, totalReduceTaskCapacity, totalSubmissions, taskTrackers.size() - getBlacklistedTrackerCount(), getBlacklistedTrackerCount(), getGraylistedTrackerCount(), getExcludedNodes().size()) ; } /** * @see JobSubmissionProtocol#killJob */ public synchronized void killJob(JobID jobid) throws IOException { if (null == jobid) { LOG.info("Null jobid object sent to JobTracker.killJob()"); return; } JobInProgress job = jobs.get(jobid); if (null == job) { LOG.info("killJob(): JobId " + jobid.toString() + " is not a valid job"); return; } // check both queue-level and job-level access aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.KILL_JOB); killJob(job); } private synchronized void killJob(JobInProgress job) { LOG.info("Killing job " + job.getJobID()); JobStatus prevStatus = (JobStatus)job.getStatus().clone(); job.kill(); // Inform the listeners if the job is killed // Note : // If the job is killed in the PREP state then the listeners will be // invoked // If the job is killed in the RUNNING state then cleanup tasks will be // launched and the updateTaskStatuses() will take care of it JobStatus newStatus = (JobStatus)job.getStatus().clone(); if (prevStatus.getRunState() != newStatus.getRunState() && newStatus.getRunState() == JobStatus.KILLED) { JobStatusChangeEvent event = new JobStatusChangeEvent(job, EventType.RUN_STATE_CHANGED, prevStatus, newStatus); updateJobInProgressListeners(event); } } /** * Discard a current delegation token. */ @Override public void cancelDelegationToken(Token<DelegationTokenIdentifier> token ) throws IOException, InterruptedException { String user = UserGroupInformation.getCurrentUser().getUserName(); secretManager.cancelToken(token, user); } /** * Get a new delegation token. */ @Override public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer )throws IOException, InterruptedException { if (!isAllowedDelegationTokenOp()) { throw new IOException( "Delegation Token can be issued only with kerberos authentication"); } UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); Text owner = new Text(ugi.getUserName()); Text realUser = null; if (ugi.getRealUser() != null) { realUser = new Text(ugi.getRealUser().getUserName()); } DelegationTokenIdentifier ident = new DelegationTokenIdentifier(owner, renewer, realUser); return new Token<DelegationTokenIdentifier>(ident, secretManager); } /** * Renew a delegation token to extend its lifetime. */ @Override public long renewDelegationToken(Token<DelegationTokenIdentifier> token ) throws IOException, InterruptedException { if (!isAllowedDelegationTokenOp()) { throw new IOException( "Delegation Token can be issued only with kerberos authentication"); } String user = UserGroupInformation.getCurrentUser().getUserName(); return secretManager.renewToken(token, user); } public void initJob(JobInProgress job) { if (null == job) { LOG.info("Init on null job is not valid"); return; } try { JobStatus prevStatus = (JobStatus)job.getStatus().clone(); LOG.info("Initializing " + job.getJobID()); job.initTasks(); // Inform the listeners if the job state has changed // Note : that the job will be in PREP state. JobStatus newStatus = (JobStatus)job.getStatus().clone(); if (prevStatus.getRunState() != newStatus.getRunState()) { JobStatusChangeEvent event = new JobStatusChangeEvent(job, EventType.RUN_STATE_CHANGED, prevStatus, newStatus); synchronized (JobTracker.this) { updateJobInProgressListeners(event); } } } catch (KillInterruptedException kie) { // If job was killed during initialization, job state will be KILLED LOG.error("Job initialization interrupted:\n" + StringUtils.stringifyException(kie)); killJob(job); } catch (Throwable t) { String failureInfo = "Job initialization failed:\n" + StringUtils.stringifyException(t); // If the job initialization is failed, job state will be FAILED LOG.error(failureInfo); job.getStatus().setFailureInfo(failureInfo); failJob(job); } } /** * Fail a job and inform the listeners. Other components in the framework * should use this to fail a job. */ public synchronized void failJob(JobInProgress job) { if (null == job) { LOG.info("Fail on null job is not valid"); return; } JobStatus prevStatus = (JobStatus)job.getStatus().clone(); LOG.info("Failing job " + job.getJobID()); job.fail(); // Inform the listeners if the job state has changed JobStatus newStatus = (JobStatus)job.getStatus().clone(); if (prevStatus.getRunState() != newStatus.getRunState()) { JobStatusChangeEvent event = new JobStatusChangeEvent(job, EventType.RUN_STATE_CHANGED, prevStatus, newStatus); updateJobInProgressListeners(event); } } public synchronized void setJobPriority(JobID jobid, String priority) throws IOException { JobInProgress job = jobs.get(jobid); if (null == job) { LOG.info("setJobPriority(): JobId " + jobid.toString() + " is not a valid job"); return; } JobPriority newPriority = JobPriority.valueOf(priority); setJobPriority(jobid, newPriority); } void storeCompletedJob(JobInProgress job) { //persists the job info in DFS completedJobStatusStore.store(job); } /** * Check if the <code>job</code> has been initialized. * * @param job {@link JobInProgress} to be checked * @return <code>true</code> if the job has been initialized, * <code>false</code> otherwise */ private boolean isJobInited(JobInProgress job) { return job.inited(); } public JobProfile getJobProfile(JobID jobid) { synchronized (this) { JobInProgress job = jobs.get(jobid); if (job != null) { // Safe to call JobInProgress.getProfile while holding the lock // on the JobTracker since it isn't a synchronized method return job.getProfile(); } else { RetireJobInfo info = retireJobs.get(jobid); if (info != null) { return info.profile; } } } return completedJobStatusStore.readJobProfile(jobid); } public JobStatus getJobStatus(JobID jobid) { if (null == jobid) { LOG.warn("JobTracker.getJobStatus() cannot get status for null jobid"); return null; } synchronized (this) { JobInProgress job = jobs.get(jobid); if (job != null) { // Safe to call JobInProgress.getStatus while holding the lock // on the JobTracker since it isn't a synchronized method return job.getStatus(); } else { RetireJobInfo info = retireJobs.get(jobid); if (info != null) { return info.status; } } } return completedJobStatusStore.readJobStatus(jobid); } private static final Counters EMPTY_COUNTERS = new Counters(); public Counters getJobCounters(JobID jobid) throws IOException { UserGroupInformation callerUGI = UserGroupInformation.getCurrentUser(); synchronized (this) { JobInProgress job = jobs.get(jobid); if (job != null) { // check the job-access aclsManager.checkAccess(job, callerUGI, Operation.VIEW_JOB_COUNTERS); Counters counters = new Counters(); if (isJobInited(job)) { boolean isFine = job.getCounters(counters); if (!isFine) { throw new IOException("Counters Exceeded limit: " + Counters.MAX_COUNTER_LIMIT); } return counters; } else { return EMPTY_COUNTERS; } } else { RetireJobInfo info = retireJobs.get(jobid); if (info != null) { return info.counters; } } } return completedJobStatusStore.readCounters(jobid); } private static final TaskReport[] EMPTY_TASK_REPORTS = new TaskReport[0]; public synchronized TaskReport[] getMapTaskReports(JobID jobid) throws IOException { JobInProgress job = jobs.get(jobid); if (job != null) { // Check authorization aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.VIEW_JOB_DETAILS); } if (job == null || !isJobInited(job)) { return EMPTY_TASK_REPORTS; } else { Vector<TaskReport> reports = new Vector<TaskReport>(); Vector<TaskInProgress> completeMapTasks = job.reportTasksInProgress(true, true); for (Iterator it = completeMapTasks.iterator(); it.hasNext();) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } Vector<TaskInProgress> incompleteMapTasks = job.reportTasksInProgress(true, false); for (Iterator it = incompleteMapTasks.iterator(); it.hasNext();) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } return reports.toArray(new TaskReport[reports.size()]); } } public synchronized TaskReport[] getReduceTaskReports(JobID jobid) throws IOException { JobInProgress job = jobs.get(jobid); if (job != null) { // Check authorization aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.VIEW_JOB_DETAILS); } if (job == null || !isJobInited(job)) { return EMPTY_TASK_REPORTS; } else { Vector<TaskReport> reports = new Vector<TaskReport>(); Vector completeReduceTasks = job.reportTasksInProgress(false, true); for (Iterator it = completeReduceTasks.iterator(); it.hasNext();) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } Vector incompleteReduceTasks = job.reportTasksInProgress(false, false); for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext();) { TaskInProgress tip = (TaskInProgress) it.next(); reports.add(tip.generateSingleReport()); } return reports.toArray(new TaskReport[reports.size()]); } } public synchronized TaskReport[] getCleanupTaskReports(JobID jobid) throws IOException { JobInProgress job = jobs.get(jobid); if (job != null) { // Check authorization aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.VIEW_JOB_DETAILS); } if (job == null || !isJobInited(job)) { return EMPTY_TASK_REPORTS; } else { Vector<TaskReport> reports = new Vector<TaskReport>(); Vector<TaskInProgress> completeTasks = job.reportCleanupTIPs(true); for (Iterator<TaskInProgress> it = completeTasks.iterator(); it.hasNext();) { TaskInProgress tip = it.next(); reports.add(tip.generateSingleReport()); } Vector<TaskInProgress> incompleteTasks = job.reportCleanupTIPs(false); for (Iterator<TaskInProgress> it = incompleteTasks.iterator(); it.hasNext();) { TaskInProgress tip = it.next(); reports.add(tip.generateSingleReport()); } return reports.toArray(new TaskReport[reports.size()]); } } public synchronized TaskReport[] getSetupTaskReports(JobID jobid) throws IOException { JobInProgress job = jobs.get(jobid); if (job != null) { // Check authorization aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.VIEW_JOB_DETAILS); } if (job == null || !isJobInited(job)) { return EMPTY_TASK_REPORTS; } else { Vector<TaskReport> reports = new Vector<TaskReport>(); Vector<TaskInProgress> completeTasks = job.reportSetupTIPs(true); for (Iterator<TaskInProgress> it = completeTasks.iterator(); it.hasNext();) { TaskInProgress tip = it.next(); reports.add(tip.generateSingleReport()); } Vector<TaskInProgress> incompleteTasks = job.reportSetupTIPs(false); for (Iterator<TaskInProgress> it = incompleteTasks.iterator(); it.hasNext();) { TaskInProgress tip = it.next(); reports.add(tip.generateSingleReport()); } return reports.toArray(new TaskReport[reports.size()]); } } public static final String MAPRED_CLUSTER_MAP_MEMORY_MB_PROPERTY = "mapred.cluster.map.memory.mb"; public static final String MAPRED_CLUSTER_REDUCE_MEMORY_MB_PROPERTY = "mapred.cluster.reduce.memory.mb"; static final String MAPRED_CLUSTER_MAX_MAP_MEMORY_MB_PROPERTY = "mapred.cluster.max.map.memory.mb"; static final String MAPRED_CLUSTER_MAX_REDUCE_MEMORY_MB_PROPERTY = "mapred.cluster.max.reduce.memory.mb"; /* * Returns a list of TaskCompletionEvent for the given job, * starting from fromEventId. * @see org.apache.hadoop.mapred.JobSubmissionProtocol#getTaskCompletionEvents(java.lang.String, int, int) */ public TaskCompletionEvent[] getTaskCompletionEvents( JobID jobid, int fromEventId, int maxEvents) throws IOException{ JobInProgress job = this.jobs.get(jobid); if (null != job) { return isJobInited(job) ? job.getTaskCompletionEvents(fromEventId, maxEvents) : TaskCompletionEvent.EMPTY_ARRAY; } return completedJobStatusStore.readJobTaskCompletionEvents(jobid, fromEventId, maxEvents); } private static final String[] EMPTY_TASK_DIAGNOSTICS = new String[0]; /** * Get the diagnostics for a given task * @param taskId the id of the task * @return an array of the diagnostic messages */ public synchronized String[] getTaskDiagnostics(TaskAttemptID taskId) throws IOException { List<String> taskDiagnosticInfo = null; JobID jobId = taskId.getJobID(); TaskID tipId = taskId.getTaskID(); JobInProgress job = jobs.get(jobId); if (job != null) { // Check authorization aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.VIEW_JOB_DETAILS); } if (job != null && isJobInited(job)) { TaskInProgress tip = job.getTaskInProgress(tipId); if (tip != null) { taskDiagnosticInfo = tip.getDiagnosticInfo(taskId); } } return ((taskDiagnosticInfo == null) ? EMPTY_TASK_DIAGNOSTICS : taskDiagnosticInfo.toArray(new String[taskDiagnosticInfo.size()])); } /** Get all the TaskStatuses from the tipid. */ TaskStatus[] getTaskStatuses(TaskID tipid) { TaskInProgress tip = getTip(tipid); return (tip == null ? new TaskStatus[0] : tip.getTaskStatuses()); } /** Returns the TaskStatus for a particular taskid. */ TaskStatus getTaskStatus(TaskAttemptID taskid) { TaskInProgress tip = getTip(taskid.getTaskID()); return (tip == null ? null : tip.getTaskStatus(taskid)); } /** * Returns the counters for the specified task in progress. */ Counters getTipCounters(TaskID tipid) { TaskInProgress tip = getTip(tipid); return (tip == null ? null : tip.getCounters()); } /** * Returns the configured task scheduler for this job tracker. * @return the configured task scheduler */ TaskScheduler getTaskScheduler() { return taskScheduler; } /** * Returns specified TaskInProgress, or null. */ public TaskInProgress getTip(TaskID tipid) { JobInProgress job = jobs.get(tipid.getJobID()); return (job == null ? null : job.getTaskInProgress(tipid)); } /** * @see JobSubmissionProtocol#killTask(TaskAttemptID, boolean) */ public synchronized boolean killTask(TaskAttemptID taskid, boolean shouldFail) throws IOException { TaskInProgress tip = taskidToTIPMap.get(taskid); if(tip != null) { // check both queue-level and job-level access aclsManager.checkAccess(tip.getJob(), UserGroupInformation.getCurrentUser(), shouldFail ? Operation.FAIL_TASK : Operation.KILL_TASK); return tip.killTask(taskid, shouldFail); } else { LOG.info("Kill task attempt failed since task " + taskid + " was not found"); return false; } } /** * Get tracker name for a given task id. * @param taskId the name of the task * @return The name of the task tracker */ public synchronized String getAssignedTracker(TaskAttemptID taskId) { return taskidToTrackerMap.get(taskId); } public JobStatus[] jobsToComplete() { return getJobStatus(jobs.values(), true); } /** * @see JobSubmissionProtocol#getAllJobs() */ public JobStatus[] getAllJobs() { List<JobStatus> list = new ArrayList<JobStatus>(); list.addAll(Arrays.asList(getJobStatus(jobs.values(),false))); list.addAll(retireJobs.getAllJobStatus()); return list.toArray(new JobStatus[list.size()]); } /** * @see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir() */ public String getSystemDir() { Path sysDir = new Path(conf.get("mapred.system.dir", "/tmp/hadoop/mapred/system")); return fs.makeQualified(sysDir).toString(); } /** * @see org.apache.hadoop.mapred.JobSubmissionProtocol#getQueueAdmins(String) */ public AccessControlList getQueueAdmins(String queueName) throws IOException { AccessControlList acl = queueManager.getQueueACL(queueName, QueueACL.ADMINISTER_JOBS); if (acl == null) { acl = new AccessControlList(" "); } return acl; } /////////////////////////////////////////////////////////////// // JobTracker methods /////////////////////////////////////////////////////////////// public JobInProgress getJob(JobID jobid) { return jobs.get(jobid); } // Get the job directory in system directory Path getSystemDirectoryForJob(JobID id) { return new Path(getSystemDir(), id.toString()); } //Get the job token file in system directory Path getSystemFileForJob(JobID id) { return new Path(getSystemDirectoryForJob(id)+"/" + JOB_INFO_FILE); } /** * Change the run-time priority of the given job. * * @param jobId job id * @param priority new {@link JobPriority} for the job * @throws IOException * @throws AccessControlException */ synchronized void setJobPriority(JobID jobId, JobPriority priority) throws AccessControlException, IOException { JobInProgress job = jobs.get(jobId); if (job != null) { // check both queue-level and job-level access aclsManager.checkAccess(job, UserGroupInformation.getCurrentUser(), Operation.SET_JOB_PRIORITY); synchronized (taskScheduler) { JobStatus oldStatus = (JobStatus)job.getStatus().clone(); job.setPriority(priority); JobStatus newStatus = (JobStatus)job.getStatus().clone(); JobStatusChangeEvent event = new JobStatusChangeEvent(job, EventType.PRIORITY_CHANGED, oldStatus, newStatus); updateJobInProgressListeners(event); } } else { LOG.warn("Trying to change the priority of an unknown job: " + jobId); } } //////////////////////////////////////////////////// // Methods to track all the TaskTrackers //////////////////////////////////////////////////// /** * Accept and process a new TaskTracker profile. We might * have known about the TaskTracker previously, or it might * be brand-new. All task-tracker structures have already * been updated. Just process the contained tasks and any * jobs that might be affected. */ void updateTaskStatuses(TaskTrackerStatus status) { String trackerName = status.getTrackerName(); for (TaskStatus report : status.getTaskReports()) { report.setTaskTracker(trackerName); TaskAttemptID taskId = report.getTaskID(); // expire it expireLaunchingTasks.removeTask(taskId); JobInProgress job = getJob(taskId.getJobID()); if (job == null) { // if job is not there in the cleanup list ... add it synchronized (trackerToJobsToCleanup) { Set<JobID> jobs = trackerToJobsToCleanup.get(trackerName); if (jobs == null) { jobs = new HashSet<JobID>(); trackerToJobsToCleanup.put(trackerName, jobs); } jobs.add(taskId.getJobID()); } continue; } if (!job.inited()) { // if job is not yet initialized ... kill the attempt synchronized (trackerToTasksToCleanup) { Set<TaskAttemptID> tasks = trackerToTasksToCleanup.get(trackerName); if (tasks == null) { tasks = new HashSet<TaskAttemptID>(); trackerToTasksToCleanup.put(trackerName, tasks); } tasks.add(taskId); } continue; } TaskInProgress tip = taskidToTIPMap.get(taskId); // Check if the tip is known to the jobtracker. In case of a restarted // jt, some tasks might join in later if (tip != null || hasRestarted()) { if (tip == null) { tip = job.getTaskInProgress(taskId.getTaskID()); job.addRunningTaskToTIP(tip, taskId, status, false); } // Update the job and inform the listeners if necessary JobStatus prevStatus = (JobStatus)job.getStatus().clone(); // Clone TaskStatus object here, because JobInProgress // or TaskInProgress can modify this object and // the changes should not get reflected in TaskTrackerStatus. // An old TaskTrackerStatus is used later in countMapTasks, etc. job.updateTaskStatus(tip, (TaskStatus)report.clone()); JobStatus newStatus = (JobStatus)job.getStatus().clone(); // Update the listeners if an incomplete job completes if (prevStatus.getRunState() != newStatus.getRunState()) { JobStatusChangeEvent event = new JobStatusChangeEvent(job, EventType.RUN_STATE_CHANGED, prevStatus, newStatus); updateJobInProgressListeners(event); } } else { LOG.info("Serious problem. While updating status, cannot find taskid " + report.getTaskID()); } // Process 'failed fetch' notifications List<TaskAttemptID> failedFetchMaps = report.getFetchFailedMaps(); if (failedFetchMaps != null) { for (TaskAttemptID mapTaskId : failedFetchMaps) { TaskInProgress failedFetchMap = taskidToTIPMap.get(mapTaskId); if (failedFetchMap != null) { // Gather information about the map which has to be failed, if need be String failedFetchTrackerName = getAssignedTracker(mapTaskId); if (failedFetchTrackerName == null) { failedFetchTrackerName = "Lost task tracker"; } failedFetchMap.getJob().fetchFailureNotification(failedFetchMap, mapTaskId, failedFetchTrackerName, taskId, trackerName); } } } } } /** * We lost the task tracker! All task-tracker structures have * already been updated. Just process the contained tasks and any * jobs that might be affected. */ void lostTaskTracker(TaskTracker taskTracker) { String trackerName = taskTracker.getTrackerName(); LOG.info("Lost tracker '" + trackerName + "'"); // remove the tracker from the local structures synchronized (trackerToJobsToCleanup) { trackerToJobsToCleanup.remove(trackerName); } synchronized (trackerToTasksToCleanup) { trackerToTasksToCleanup.remove(trackerName); } // Inform the recovery manager recoveryManager.unMarkTracker(trackerName); Set<TaskAttemptID> lostTasks = trackerToTaskMap.get(trackerName); trackerToTaskMap.remove(trackerName); if (lostTasks != null) { // List of jobs which had any of their tasks fail on this tracker Set<JobInProgress> jobsWithFailures = new HashSet<JobInProgress>(); for (TaskAttemptID taskId : lostTasks) { TaskInProgress tip = taskidToTIPMap.get(taskId); JobInProgress job = tip.getJob(); // Completed reduce tasks never need to be failed, because // their outputs go to dfs // And completed maps with zero reducers of the job // never need to be failed. if (!tip.isComplete() || (tip.isMapTask() && !tip.isJobSetupTask() && job.desiredReduces() != 0)) { // if the job is done, we don't want to change anything if (job.getStatus().getRunState() == JobStatus.RUNNING || job.getStatus().getRunState() == JobStatus.PREP) { // the state will be KILLED_UNCLEAN, if the task(map or reduce) // was RUNNING on the tracker TaskStatus.State killState = (tip.isRunningTask(taskId) && !tip.isJobSetupTask() && !tip.isJobCleanupTask()) ? TaskStatus.State.KILLED_UNCLEAN : TaskStatus.State.KILLED; job.failedTask(tip, taskId, ("Lost task tracker: " + trackerName), (tip.isMapTask() ? TaskStatus.Phase.MAP : TaskStatus.Phase.REDUCE), killState, trackerName); jobsWithFailures.add(job); } } else { // Completed 'reduce' task and completed 'maps' with zero // reducers of the job, not failed; // only removed from data-structures. markCompletedTaskAttempt(trackerName, taskId); } } // Penalize this tracker for each of the jobs which // had any tasks running on it when it was 'lost' // Also, remove any reserved slots on this tasktracker for (JobInProgress job : jobsWithFailures) { job.addTrackerTaskFailure(trackerName, taskTracker); } // Cleanup taskTracker.cancelAllReservations(); // Purge 'marked' tasks, needs to be done // here to prevent hanging references! removeMarkedTasks(trackerName); } } /** * Rereads the config to get hosts and exclude list file names. * Rereads the files to update the hosts and exclude lists. */ public synchronized void refreshNodes() throws IOException { String user = UserGroupInformation.getCurrentUser().getShortUserName(); // check access if (!aclsManager.isMRAdmin(UserGroupInformation.getCurrentUser())) { AuditLogger.logFailure(user, Constants.REFRESH_NODES, aclsManager.getAdminsAcl().toString(), Constants.JOBTRACKER, Constants.UNAUTHORIZED_USER); throw new AccessControlException(user + " is not authorized to refresh nodes."); } AuditLogger.logSuccess(user, Constants.REFRESH_NODES, Constants.JOBTRACKER); // call the actual api refreshHosts(); } UserGroupInformation getMROwner() { return aclsManager.getMROwner(); } private synchronized void refreshHosts() throws IOException { // Reread the config to get mapred.hosts and mapred.hosts.exclude filenames. // Update the file names and refresh internal includes and excludes list LOG.info("Refreshing hosts information"); Configuration conf = new Configuration(); hostsReader.updateFileNames(conf.get("mapred.hosts",""), conf.get("mapred.hosts.exclude", "")); hostsReader.refresh(); Set<String> excludeSet = new HashSet<String>(); for(Map.Entry<String, TaskTracker> eSet : taskTrackers.entrySet()) { String trackerName = eSet.getKey(); TaskTrackerStatus status = eSet.getValue().getStatus(); // Check if not include i.e not in host list or in hosts list but excluded if (!inHostsList(status) || inExcludedHostsList(status)) { excludeSet.add(status.getHost()); // add to rejected trackers } } decommissionNodes(excludeSet); } // Assumes JobTracker, taskTrackers and trackerExpiryQueue is locked on entry // Remove a tracker from the system private void removeTracker(TaskTracker tracker) { String trackerName = tracker.getTrackerName(); String hostName = JobInProgress.convertTrackerNameToHostName(trackerName); // Remove completely after marking the tasks as 'KILLED' lostTaskTracker(tracker); // tracker is lost; if it is blacklisted and/or graylisted, remove // it from the relevant count(s) of trackers in the cluster if (isBlacklisted(trackerName)) { LOG.info("Removing " + hostName + " from blacklist"); faultyTrackers.decrBlacklistedTrackers(1); } if (isGraylisted(trackerName)) { LOG.info("Removing " + hostName + " from graylist"); faultyTrackers.decrGraylistedTrackers(1); } updateTaskTrackerStatus(trackerName, null); statistics.taskTrackerRemoved(trackerName); getInstrumentation().decTrackers(1); } // main decommission synchronized void decommissionNodes(Set<String> hosts) throws IOException { LOG.info("Decommissioning " + hosts.size() + " nodes"); // create a list of tracker hostnames synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { int trackersDecommissioned = 0; for (String host : hosts) { LOG.info("Decommissioning host " + host); Set<TaskTracker> trackers = hostnameToTaskTracker.remove(host); if (trackers != null) { for (TaskTracker tracker : trackers) { LOG.info("Decommission: Losing tracker " + tracker.getTrackerName() + " on host " + host); removeTracker(tracker); } trackersDecommissioned += trackers.size(); } LOG.info("Host " + host + " is ready for decommissioning"); } getInstrumentation().setDecommissionedTrackers(trackersDecommissioned); } } } /** * Returns a set of excluded nodes. */ Collection<String> getExcludedNodes() { return hostsReader.getExcludedHosts(); } /** * Get the localized job file path on the job trackers local file system * @param jobId id of the job * @return the path of the job conf file on the local file system */ public static String getLocalJobFilePath(JobID jobId){ return JobHistory.JobInfo.getLocalJobFilePath(jobId); } //////////////////////////////////////////////////////////// // main() //////////////////////////////////////////////////////////// /** * Start the JobTracker process. This is used only for debugging. As a rule, * JobTracker should be run as part of the DFS Namenode process. */ public static void main(String argv[] ) throws IOException, InterruptedException { StringUtils.startupShutdownMessage(JobTracker.class, argv, LOG); try { if(argv.length == 0) { JobTracker tracker = startTracker(new JobConf()); tracker.offerService(); } else { if ("-dumpConfiguration".equals(argv[0]) && argv.length == 1) { dumpConfiguration(new PrintWriter(System.out)); } else { System.out.println("usage: JobTracker [-dumpConfiguration]"); System.exit(-1); } } } catch (Throwable e) { LOG.fatal(StringUtils.stringifyException(e)); System.exit(-1); } } /** * Dumps the configuration properties in Json format * @param writer {@link}Writer object to which the output is written * @throws IOException */ private static void dumpConfiguration(Writer writer) throws IOException { Configuration.dumpConfiguration(new JobConf(), writer); writer.write("\n"); // get the QueueManager configuration properties QueueManager.dumpConfiguration(writer); writer.write("\n"); } @Override public JobQueueInfo[] getQueues() throws IOException { return queueManager.getJobQueueInfos(); } @Override public JobQueueInfo getQueueInfo(String queue) throws IOException { return queueManager.getJobQueueInfo(queue); } @Override public JobStatus[] getJobsFromQueue(String queue) throws IOException { Collection<JobInProgress> jips = taskScheduler.getJobs(queue); return getJobStatus(jips,false); } @Override public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException{ return queueManager.getQueueAcls( UserGroupInformation.getCurrentUser()); } private synchronized JobStatus[] getJobStatus(Collection<JobInProgress> jips, boolean toComplete) { if(jips == null || jips.isEmpty()) { return new JobStatus[]{}; } ArrayList<JobStatus> jobStatusList = new ArrayList<JobStatus>(); for(JobInProgress jip : jips) { JobStatus status = jip.getStatus(); status.setStartTime(jip.getStartTime()); status.setUsername(jip.getProfile().getUser()); if(toComplete) { if(status.getRunState() == JobStatus.RUNNING || status.getRunState() == JobStatus.PREP) { jobStatusList.add(status); } }else { jobStatusList.add(status); } } return jobStatusList.toArray( new JobStatus[jobStatusList.size()]); } /** * Returns the confgiured maximum number of tasks for a single job */ int getMaxTasksPerJob() { return conf.getInt("mapred.jobtracker.maxtasks.per.job", -1); } @Override public void refreshServiceAcl() throws IOException { if (!conf.getBoolean( ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) { throw new AuthorizationException("Service Level Authorization not enabled!"); } ServiceAuthorizationManager.refresh(conf, new MapReducePolicyProvider()); } private void initializeTaskMemoryRelatedConfig() { memSizeForMapSlotOnJT = JobConf.normalizeMemoryConfigValue(conf.getLong( JobTracker.MAPRED_CLUSTER_MAP_MEMORY_MB_PROPERTY, JobConf.DISABLED_MEMORY_LIMIT)); memSizeForReduceSlotOnJT = JobConf.normalizeMemoryConfigValue(conf.getLong( JobTracker.MAPRED_CLUSTER_REDUCE_MEMORY_MB_PROPERTY, JobConf.DISABLED_MEMORY_LIMIT)); if (conf.get(JobConf.UPPER_LIMIT_ON_TASK_VMEM_PROPERTY) != null) { LOG.warn( JobConf.deprecatedString( JobConf.UPPER_LIMIT_ON_TASK_VMEM_PROPERTY)+ " instead use "+JobTracker.MAPRED_CLUSTER_MAX_MAP_MEMORY_MB_PROPERTY+ " and " + JobTracker.MAPRED_CLUSTER_MAX_REDUCE_MEMORY_MB_PROPERTY ); limitMaxMemForMapTasks = limitMaxMemForReduceTasks = JobConf.normalizeMemoryConfigValue( conf.getLong( JobConf.UPPER_LIMIT_ON_TASK_VMEM_PROPERTY, JobConf.DISABLED_MEMORY_LIMIT)); if (limitMaxMemForMapTasks != JobConf.DISABLED_MEMORY_LIMIT && limitMaxMemForMapTasks >= 0) { limitMaxMemForMapTasks = limitMaxMemForReduceTasks = limitMaxMemForMapTasks / (1024 * 1024); //Converting old values in bytes to MB } } else { limitMaxMemForMapTasks = JobConf.normalizeMemoryConfigValue( conf.getLong( JobTracker.MAPRED_CLUSTER_MAX_MAP_MEMORY_MB_PROPERTY, JobConf.DISABLED_MEMORY_LIMIT)); limitMaxMemForReduceTasks = JobConf.normalizeMemoryConfigValue( conf.getLong( JobTracker.MAPRED_CLUSTER_MAX_REDUCE_MEMORY_MB_PROPERTY, JobConf.DISABLED_MEMORY_LIMIT)); } LOG.info(new StringBuilder().append("Scheduler configured with ").append( "(memSizeForMapSlotOnJT, memSizeForReduceSlotOnJT,").append( " limitMaxMemForMapTasks, limitMaxMemForReduceTasks) (").append( memSizeForMapSlotOnJT).append(", ").append(memSizeForReduceSlotOnJT) .append(", ").append(limitMaxMemForMapTasks).append(", ").append( limitMaxMemForReduceTasks).append(")")); } @Override public void refreshSuperUserGroupsConfiguration() { LOG.info("Refreshing superuser proxy groups mapping "); ProxyUsers.refreshSuperUserGroupsConfiguration(); } @Override public void refreshUserToGroupsMappings() throws IOException { LOG.info("Refreshing all user-to-groups mappings. Requested by user: " + UserGroupInformation.getCurrentUser().getShortUserName()); Groups.getUserToGroupsMappingService().refresh(); } private boolean perTaskMemoryConfigurationSetOnJT() { if (limitMaxMemForMapTasks == JobConf.DISABLED_MEMORY_LIMIT || limitMaxMemForReduceTasks == JobConf.DISABLED_MEMORY_LIMIT || memSizeForMapSlotOnJT == JobConf.DISABLED_MEMORY_LIMIT || memSizeForReduceSlotOnJT == JobConf.DISABLED_MEMORY_LIMIT) { return false; } return true; } /** * Check the job if it has invalid requirements and throw and IOException if does have. * * @param job * @throws IOException */ private void checkMemoryRequirements(JobInProgress job) throws IOException { if (!perTaskMemoryConfigurationSetOnJT()) { LOG.debug("Per-Task memory configuration is not set on JT. " + "Not checking the job for invalid memory requirements."); return; } boolean invalidJob = false; String msg = ""; long maxMemForMapTask = job.getMemoryForMapTask(); long maxMemForReduceTask = job.getMemoryForReduceTask(); if (maxMemForMapTask == JobConf.DISABLED_MEMORY_LIMIT || maxMemForReduceTask == JobConf.DISABLED_MEMORY_LIMIT) { invalidJob = true; msg = "Invalid job requirements."; } if (maxMemForMapTask > limitMaxMemForMapTasks || maxMemForReduceTask > limitMaxMemForReduceTasks) { invalidJob = true; msg = "Exceeds the cluster's max-memory-limit."; } if (invalidJob) { StringBuilder jobStr = new StringBuilder().append(job.getJobID().toString()).append("(") .append(maxMemForMapTask).append(" memForMapTasks ").append( maxMemForReduceTask).append(" memForReduceTasks): "); LOG.warn(jobStr.toString() + msg); throw new IOException(jobStr.toString() + msg); } } @Override public void refreshQueues() throws IOException { LOG.info("Refreshing queue information. requested by : " + UserGroupInformation.getCurrentUser().getShortUserName()); this.queueManager.refreshQueues(new Configuration()); synchronized (taskScheduler) { taskScheduler.refresh(); } } // used by the web UI (machines.jsp) public String getReasonsForBlacklisting(String host) { return getReasonsForBlackOrGraylisting(host, false); } public String getReasonsForGraylisting(String host) { return getReasonsForBlackOrGraylisting(host, true); } synchronized private String getReasonsForBlackOrGraylisting(String host, boolean gray) { FaultInfo fi = faultyTrackers.getFaultInfo(host, gray); if (fi == null) { return ""; } return fi.getTrackerBlackOrGraylistReport(gray); } /** Test Methods */ synchronized Set<ReasonForBlackListing> getReasonForBlackList(String host) { FaultInfo fi = faultyTrackers.getFaultInfo(host, false); if (fi == null) { return new HashSet<ReasonForBlackListing>(); } return fi.getReasonForBlacklisting(false); } /** * * @return true if delegation token operation is allowed */ private boolean isAllowedDelegationTokenOp() throws IOException { AuthenticationMethod authMethod = getConnectionAuthenticationMethod(); if (UserGroupInformation.isSecurityEnabled() && (authMethod != AuthenticationMethod.KERBEROS) && (authMethod != AuthenticationMethod.KERBEROS_SSL) && (authMethod != AuthenticationMethod.CERTIFICATE)) { return false; } return true; } /** * Returns authentication method used to establish the connection * @return AuthenticationMethod used to establish connection * @throws IOException */ private AuthenticationMethod getConnectionAuthenticationMethod() throws IOException { UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); if (authMethod == AuthenticationMethod.PROXY) { authMethod = ugi.getRealUser().getAuthenticationMethod(); } return authMethod; } JobACLsManager getJobACLsManager() { return aclsManager.getJobACLsManager(); } ACLsManager getACLsManager() { return aclsManager; } // Begin MXBean implementation @Override public String getHostname() { return StringUtils.simpleHostname(getJobTrackerMachine()); } @Override public String getVersion() { return VersionInfo.getVersion() +", r"+ VersionInfo.getRevision(); } @Override public String getConfigVersion() { return conf.get(CONF_VERSION_KEY, CONF_VERSION_DEFAULT); } @Override public int getThreadCount() { return ManagementFactory.getThreadMXBean().getThreadCount(); } @Override public String getSummaryJson() { return getSummary().toJson(); } InfoMap getSummary() { final ClusterMetrics metrics = getClusterMetrics(); InfoMap map = new InfoMap(); map.put("nodes", metrics.getTaskTrackerCount() + getBlacklistedTrackerCount()); map.put("alive", metrics.getTaskTrackerCount()); map.put("blacklisted", getBlacklistedTrackerCount()); map.put("graylisted", getGraylistedTrackerCount()); map.put("slots", new InfoMap() {{ put("map_slots", metrics.getMapSlotCapacity()); put("map_slots_used", metrics.getOccupiedMapSlots()); put("reduce_slots", metrics.getReduceSlotCapacity()); put("reduce_slots_used", metrics.getOccupiedReduceSlots()); }}); map.put("jobs", metrics.getTotalJobSubmissions()); return map; } @Override public String getAliveNodesInfoJson() { return JSON.toString(getAliveNodesInfo()); } List<InfoMap> getAliveNodesInfo() { List<InfoMap> info = new ArrayList<InfoMap>(); for (final TaskTrackerStatus tts : activeTaskTrackers()) { final int mapSlots = tts.getMaxMapSlots(); final int redSlots = tts.getMaxReduceSlots(); info.add(new InfoMap() {{ put("hostname", tts.getHost()); put("last_seen", tts.getLastSeen()); put("health", tts.getHealthStatus().isNodeHealthy() ? "OK" : ""); put("slots", new InfoMap() {{ put("map_slots", mapSlots); put("map_slots_used", mapSlots - tts.getAvailableMapSlots()); put("reduce_slots", redSlots); put("reduce_slots_used", redSlots - tts.getAvailableReduceSlots()); }}); put("failures", tts.getFailures()); }}); } return info; } @Override public String getBlacklistedNodesInfoJson() { return JSON.toString(getUnhealthyNodesInfo(blacklistedTaskTrackers())); } @Override public String getGraylistedNodesInfoJson() { return JSON.toString(getUnhealthyNodesInfo(graylistedTaskTrackers())); } List<InfoMap> getUnhealthyNodesInfo(Collection<TaskTrackerStatus> list) { List<InfoMap> info = new ArrayList<InfoMap>(); for (final TaskTrackerStatus tts : list) { info.add(new InfoMap() {{ put("hostname", tts.getHost()); put("last_seen", tts.getLastSeen()); put("reason", tts.getHealthStatus().getHealthReport()); }}); } return info; } @Override public String getQueueInfoJson() { return getQueueInfo().toJson(); } InfoMap getQueueInfo() { InfoMap map = new InfoMap(); try { for (final JobQueueInfo q : getQueues()) { map.put(q.getQueueName(), new InfoMap() {{ put("state", q.getQueueState()); put("info", q.getSchedulingInfo()); }}); } } catch (Exception e) { throw new RuntimeException("Getting queue info", e); } return map; } // End MXbean implementaiton }