/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.util.List; import org.apache.hadoop.mapred.Counters.Group; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.metrics.MetricsContext; import org.apache.hadoop.metrics.MetricsRecord; import org.apache.hadoop.metrics.MetricsUtil; import org.apache.hadoop.metrics.Updater; import org.apache.hadoop.metrics.jvm.JvmMetrics; class JobTrackerMetricsInst extends JobTrackerInstrumentation implements Updater { private final MetricsRecord metricsRecord; private int numMapTasksLaunched = 0; private int numMapTasksCompleted = 0; private int numMapTasksFailed = 0; private int numReduceTasksLaunched = 0; private int numReduceTasksCompleted = 0; private int numReduceTasksFailed = 0; private int numJobsSubmitted = 0; private int numJobsCompleted = 0; private int numWaitingMaps = 0; private int numWaitingReduces = 0; private int numSpeculativeMaps = 0; private int numSpeculativeReduces = 0; private int numSpeculativeSucceededMaps = 0; private int numSpeculativeSucceededReduces = 0; private int numSpeculativeWasteMaps = 0; private int numSpeculativeWasteReduces = 0; private int numDataLocalMaps = 0; private int numRackLocalMaps = 0; private long killedMapTime = 0L; private long killedReduceTime = 0L; private long failedMapTime = 0L; private long failedReduceTime = 0L; private long speculativeMapTimeWaste = 0L; private long speculativeReduceTimeWaste = 0L; private final Counters countersToMetrics = new Counters(); //Cluster status fields. private volatile int numMapSlots = 0; private volatile int numReduceSlots = 0; private int numBlackListedMapSlots = 0; private int numBlackListedReduceSlots = 0; private int numReservedMapSlots = 0; private int numReservedReduceSlots = 0; private int numOccupiedMapSlots = 0; private int numOccupiedReduceSlots = 0; private int numJobsFailed = 0; private int numJobsKilled = 0; private int numJobsPreparing = 0; private int numJobsRunning = 0; private int numRunningMaps = 0; private int numRunningReduces = 0; private int numMapTasksKilled = 0; private int numReduceTasksKilled = 0; private int numTrackers = 0; private int numTrackersBlackListed = 0; private int numTrackersDecommissioned = 0; private int numTrackersExcluded = 0; private int numTrackersDead = 0; private int numTasksInMemory = 0; //Extended JobTracker Metrics private long totalSubmitTime = 0; private long numJobsLaunched = 0; private long totalMapInputBytes = 0; private long localMapInputBytes = 0; private long rackMapInputBytes = 0; public JobTrackerMetricsInst(JobTracker tracker, JobConf conf) { super(tracker, conf); String sessionId = conf.getSessionId(); // Initiate JVM Metrics JvmMetrics.init("JobTracker", sessionId); // Create a record for map-reduce metrics MetricsContext context = MetricsUtil.getContext("mapred"); metricsRecord = MetricsUtil.createRecord(context, "jobtracker"); metricsRecord.setTag("sessionId", sessionId); context.registerUpdater(this); } /** * Since this object is a registered updater, this method will be called * periodically, e.g. every 5 seconds. */ public void doUpdates(MetricsContext unused) { // In case of running in LocalMode tracker == null if (tracker != null) { synchronized (tracker) { synchronized (this) { numRunningMaps = 0; numRunningReduces = 0; numWaitingMaps = 0; numWaitingReduces = 0; numTasksInMemory = 0; List<JobInProgress> jobs = tracker.getRunningJobs(); for (JobInProgress jip : jobs) { for (TaskInProgress tip : jip.maps) { if (tip.isRunning()) { numRunningMaps++; } else if (tip.isRunnable()) { numWaitingMaps++; } } for (TaskInProgress tip : jip.reduces) { if (tip.isRunning()) { numRunningReduces++; } else if (tip.isRunnable()) { numWaitingReduces++; } } numTasksInMemory += jip.getTasks(TaskType.MAP).length; numTasksInMemory += jip.getTasks(TaskType.REDUCE).length; } // Get tracker metrics numTrackersDead = tracker.getDeadNodes().size(); ClusterStatus cs = tracker.getClusterStatus(false); numTrackersExcluded = cs.getNumExcludedNodes(); } } } synchronized (this) { metricsRecord.setMetric("map_slots", numMapSlots); metricsRecord.setMetric("reduce_slots", numReduceSlots); metricsRecord.incrMetric("blacklisted_maps", numBlackListedMapSlots); metricsRecord.incrMetric("blacklisted_reduces", numBlackListedReduceSlots); metricsRecord.incrMetric("maps_launched", numMapTasksLaunched); metricsRecord.incrMetric("maps_completed", numMapTasksCompleted); metricsRecord.incrMetric("maps_failed", numMapTasksFailed); metricsRecord.incrMetric("reduces_launched", numReduceTasksLaunched); metricsRecord.incrMetric("reduces_completed", numReduceTasksCompleted); metricsRecord.incrMetric("reduces_failed", numReduceTasksFailed); metricsRecord.incrMetric("jobs_submitted", numJobsSubmitted); metricsRecord.incrMetric("jobs_completed", numJobsCompleted); metricsRecord.setMetric("waiting_maps", numWaitingMaps); metricsRecord.setMetric("waiting_reduces", numWaitingReduces); metricsRecord.incrMetric("num_speculative_maps", numSpeculativeMaps); metricsRecord.incrMetric("num_speculative_reduces", numSpeculativeReduces); metricsRecord.incrMetric("num_speculative_succeeded_maps", numSpeculativeSucceededMaps); metricsRecord.incrMetric("num_speculative_succeeded_reduces", numSpeculativeSucceededReduces); metricsRecord.incrMetric("num_speculative_wasted_maps", numSpeculativeWasteMaps); metricsRecord.incrMetric("num_speculative_wasted_reduces", numSpeculativeWasteReduces); metricsRecord.incrMetric("speculative_map_time_waste", speculativeMapTimeWaste); metricsRecord.incrMetric("speculative_reduce_time_waste", speculativeReduceTimeWaste); metricsRecord.incrMetric("killed_tasks_map_time", killedMapTime); metricsRecord.incrMetric("killed_tasks_reduce_time", killedReduceTime); metricsRecord.incrMetric("failed_tasks_map_time", failedMapTime); metricsRecord.incrMetric("failed_tasks_reduce_time", failedReduceTime); metricsRecord.incrMetric("num_dataLocal_maps", numDataLocalMaps); metricsRecord.incrMetric("num_rackLocal_maps", numRackLocalMaps); metricsRecord.incrMetric("reserved_map_slots", numReservedMapSlots); metricsRecord.incrMetric("reserved_reduce_slots", numReservedReduceSlots); metricsRecord.incrMetric("occupied_map_slots", numOccupiedMapSlots); metricsRecord.incrMetric("occupied_reduce_slots", numOccupiedReduceSlots); metricsRecord.incrMetric("jobs_failed", numJobsFailed); metricsRecord.incrMetric("jobs_killed", numJobsKilled); metricsRecord.incrMetric("jobs_preparing", numJobsPreparing); metricsRecord.incrMetric("jobs_running", numJobsRunning); metricsRecord.setMetric("running_maps", numRunningMaps); metricsRecord.setMetric("running_reduces", numRunningReduces); metricsRecord.setMetric("num_tasks_in_memory", numTasksInMemory); metricsRecord.incrMetric("maps_killed", numMapTasksKilled); metricsRecord.incrMetric("reduces_killed", numReduceTasksKilled); metricsRecord.setMetric("trackers", numTrackers); metricsRecord.setMetric("trackers_blacklisted", numTrackersBlackListed); metricsRecord.setMetric("trackers_decommissioned", numTrackersDecommissioned); metricsRecord.setMetric("trackers_excluded", numTrackersExcluded); metricsRecord.setMetric("trackers_dead", numTrackersDead); metricsRecord.incrMetric("num_launched_jobs", numJobsLaunched); metricsRecord.incrMetric("total_submit_time", totalSubmitTime); metricsRecord.incrMetric("total_map_input_bytes", totalMapInputBytes); metricsRecord.incrMetric("local_map_input_bytes", localMapInputBytes); metricsRecord.incrMetric("rack_map_input_bytes", rackMapInputBytes); for (Group group: countersToMetrics) { String groupName = group.getName(); for (Counter counter : group) { String name = groupName + "_" + counter.getName(); name = name.replaceAll("[^a-zA-Z_]", "_").toLowerCase(); metricsRecord.incrMetric(name, counter.getValue()); } } clearCounters(); numMapTasksLaunched = 0; numMapTasksCompleted = 0; numMapTasksFailed = 0; numReduceTasksLaunched = 0; numReduceTasksCompleted = 0; numReduceTasksFailed = 0; numJobsSubmitted = 0; numJobsCompleted = 0; numWaitingMaps = 0; numWaitingReduces = 0; numBlackListedMapSlots = 0; numBlackListedReduceSlots = 0; numSpeculativeMaps = 0; numSpeculativeReduces = 0; numSpeculativeSucceededMaps = 0; numSpeculativeSucceededReduces = 0; numSpeculativeWasteMaps = 0; numSpeculativeWasteReduces = 0; speculativeMapTimeWaste = 0L; speculativeReduceTimeWaste = 0L; killedMapTime = 0; killedReduceTime = 0; failedMapTime = 0; failedReduceTime = 0; numDataLocalMaps = 0; numRackLocalMaps = 0; numReservedMapSlots = 0; numReservedReduceSlots = 0; numOccupiedMapSlots = 0; numOccupiedReduceSlots = 0; numJobsFailed = 0; numJobsKilled = 0; numJobsPreparing = 0; numJobsRunning = 0; numRunningMaps = 0; numRunningReduces = 0; numMapTasksKilled = 0; numReduceTasksKilled = 0; numTrackers = 0; numTrackersBlackListed = 0; totalSubmitTime = 0; numJobsLaunched = 0; totalMapInputBytes = 0; localMapInputBytes = 0; rackMapInputBytes = 0; } metricsRecord.update(); } @Override public synchronized void launchMap(TaskAttemptID taskAttemptID) { ++numMapTasksLaunched; decWaitingMaps(taskAttemptID.getJobID(), 1); } @Override public synchronized void launchDataLocalMap(TaskAttemptID taskAttemptID) { ++numDataLocalMaps; } @Override public synchronized void launchRackLocalMap(TaskAttemptID taskAttemptID) { ++numRackLocalMaps; } @Override public synchronized void completeMap(TaskAttemptID taskAttemptID) { ++numMapTasksCompleted; } @Override public synchronized void speculateMap(TaskAttemptID taskAttemptID) { ++numSpeculativeMaps; } public synchronized void speculativeSucceededMap( TaskAttemptID taskAttemptID) { ++numSpeculativeSucceededMaps; } public synchronized void speculativeSucceededReduce( TaskAttemptID taskAttemptID) { ++numSpeculativeSucceededReduces; } @Override public synchronized void failedMap(TaskAttemptID taskAttemptID, boolean wasFailed, boolean isSpeculative, long taskStartTime) { long timeSpent = JobTracker.getClock().getTime() - taskStartTime; if (wasFailed) { ++numMapTasksFailed; failedMapTime += timeSpent; } else { ++numMapTasksKilled; killedMapTime += timeSpent; if (isSpeculative) { ++numSpeculativeWasteMaps; speculativeMapTimeWaste += timeSpent; } } addWaitingMaps(taskAttemptID.getJobID(), 1); } @Override public synchronized void launchReduce(TaskAttemptID taskAttemptID) { ++numReduceTasksLaunched; decWaitingReduces(taskAttemptID.getJobID(), 1); } @Override public synchronized void completeReduce(TaskAttemptID taskAttemptID) { ++numReduceTasksCompleted; } @Override public synchronized void speculateReduce(TaskAttemptID taskAttemptID) { ++numSpeculativeReduces; } @Override public synchronized void failedReduce(TaskAttemptID taskAttemptID, boolean wasFailed, boolean isSpeculative, long taskStartTime) { long timeSpent = JobTracker.getClock().getTime() - taskStartTime; if (wasFailed) { ++numReduceTasksFailed; failedReduceTime += timeSpent; } else { ++numReduceTasksKilled; failedReduceTime += timeSpent; if (isSpeculative) { ++numSpeculativeWasteReduces; speculativeReduceTimeWaste += timeSpent; } } addWaitingReduces(taskAttemptID.getJobID(), 1); } @Override public synchronized void submitJob(JobConf conf, JobID id) { ++numJobsSubmitted; } @Override public synchronized void completeJob(JobConf conf, JobID id) { collectJobCounters(id); ++numJobsCompleted; } @Override public synchronized void addWaitingMaps(JobID id, int task) { } @Override public synchronized void decWaitingMaps(JobID id, int task) { } @Override public synchronized void addWaitingReduces(JobID id, int task) { } @Override public synchronized void decWaitingReduces(JobID id, int task){ } @Override public synchronized void setMapSlots(int slots) { numMapSlots = slots; } @Override public synchronized void setReduceSlots(int slots) { numReduceSlots = slots; } @Override public synchronized void addBlackListedMapSlots(int slots){ numBlackListedMapSlots += slots; } @Override public synchronized void decBlackListedMapSlots(int slots){ numBlackListedMapSlots -= slots; } @Override public synchronized void addBlackListedReduceSlots(int slots){ numBlackListedReduceSlots += slots; } @Override public synchronized void decBlackListedReduceSlots(int slots){ numBlackListedReduceSlots -= slots; } @Override public synchronized void addReservedMapSlots(int slots) { numReservedMapSlots += slots; } @Override public synchronized void decReservedMapSlots(int slots) { numReservedMapSlots -= slots; } @Override public synchronized void addReservedReduceSlots(int slots) { numReservedReduceSlots += slots; } @Override public synchronized void decReservedReduceSlots(int slots) { numReservedReduceSlots -= slots; } @Override public synchronized void addOccupiedMapSlots(int slots) { numOccupiedMapSlots += slots; } @Override public synchronized void decOccupiedMapSlots(int slots) { numOccupiedMapSlots -= slots; } @Override public synchronized void addOccupiedReduceSlots(int slots) { numOccupiedReduceSlots += slots; } @Override public synchronized void decOccupiedReduceSlots(int slots) { numOccupiedReduceSlots -= slots; } @Override public synchronized void failedJob(JobConf conf, JobID id) { numJobsFailed++; } @Override public synchronized void killedJob(JobConf conf, JobID id) { numJobsKilled++; } @Override public synchronized void addPrepJob(JobConf conf, JobID id) { numJobsPreparing++; } @Override public synchronized void decPrepJob(JobConf conf, JobID id) { numJobsPreparing--; } @Override public synchronized void addRunningJob(JobConf conf, JobID id) { numJobsRunning++; } @Override public synchronized void decRunningJob(JobConf conf, JobID id) { numJobsRunning--; } @Override public synchronized void addRunningMaps(int task) { } @Override public synchronized void decRunningMaps(int task) { } @Override public synchronized void addRunningReduces(int task) { } @Override public synchronized void decRunningReduces(int task) { } @Override public synchronized void killedMap(TaskAttemptID taskAttemptID) { } @Override public synchronized void killedReduce(TaskAttemptID taskAttemptID) { } @Override public synchronized void addTrackers(int trackers) { numTrackers += trackers; } @Override public synchronized void decTrackers(int trackers) { numTrackers -= trackers; } @Override public synchronized void addBlackListedTrackers(int trackers) { numTrackersBlackListed += trackers; } @Override public synchronized void decBlackListedTrackers(int trackers) { numTrackersBlackListed -= trackers; } @Override public synchronized void setDecommissionedTrackers(int trackers) { numTrackersDecommissioned = trackers; } @Override public synchronized void addLaunchedJobs(long submitTime) { ++numJobsLaunched; totalSubmitTime += submitTime; } @Override public synchronized void addMapInputBytes(long size) { totalMapInputBytes += size; } @Override public synchronized void addLocalMapInputBytes(long size) { localMapInputBytes += size; addMapInputBytes(size); } @Override public synchronized void addRackMapInputBytes(long size) { rackMapInputBytes += size; addMapInputBytes(size); } @Override public void terminateJob(JobConf conf, JobID id) { collectJobCounters(id); } private synchronized void collectJobCounters(JobID id) { JobInProgress job = tracker.jobs.get(id); if (job == null) { return; } Counters jobCounter = job.getCounters(); for (JobInProgress.Counter key : JobInProgress.Counter.values()) { countersToMetrics.findCounter(key). increment(jobCounter.findCounter(key).getValue()); } for (Task.Counter key : Task.Counter.values()) { countersToMetrics.findCounter(key). increment(jobCounter.findCounter(key).getValue()); } for (Counter counter : jobCounter.getGroup(Task.FILESYSTEM_COUNTER_GROUP)) { countersToMetrics.incrCounter( Task.FILESYSTEM_COUNTER_GROUP, counter.getName(), counter.getValue()); } } /* * Set everything in the counters to zero */ private void clearCounters() { for (Group g : countersToMetrics) { for (Counter c : g) { c.setValue(0); } } } }