package org.apache.hadoop.mapred;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.FairScheduler.JobInfo;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater;
public class FairSchedulerMetricsInst implements Updater {
private final FairScheduler scheduler;
private final MetricsRecord metricsRecord;
private final JobTracker jobTracker;
private final MetricsContext context = MetricsUtil.getContext("mapred");
private final Map<String, MetricsRecord> poolToMetricsRecord;
private long updatePeriod = 0;
private long lastUpdateTime = 0;
private int numPreemptMaps = 0;
private int numPreemptReduces = 0;
public FairSchedulerMetricsInst(FairScheduler scheduler, Configuration conf) {
this.scheduler = scheduler;
this.jobTracker = (JobTracker) scheduler.taskTrackerManager;
// Create a record for map-reduce metrics
metricsRecord = MetricsUtil.createRecord(context, "fairscheduler");
poolToMetricsRecord = new HashMap<String, MetricsRecord>();
context.registerUpdater(this);
updatePeriod = conf.getLong("mapred.fairscheduler.metric.update.period",
5 * 60 * 1000); // default period is 5 MINs
}
@Override
public void doUpdates(MetricsContext context) {
long now = JobTracker.getClock().getTime();
if (now - lastUpdateTime > updatePeriod) {
updateMetrics();
lastUpdateTime = now;
}
updateCounters();
metricsRecord.update();
for (MetricsRecord mr : poolToMetricsRecord.values()) {
mr.update();
}
}
public synchronized void preemptMap(TaskAttemptID taskAttemptID) {
++numPreemptMaps;
}
public synchronized void preemptReduce(TaskAttemptID taskAttemptID) {
++numPreemptReduces;
}
private void updateCounters() {
synchronized (this) {
metricsRecord.incrMetric("num_preempt_maps", numPreemptMaps);
metricsRecord.incrMetric("num_preempt_reduces", numPreemptReduces);
numPreemptMaps = 0;
numPreemptReduces = 0;
}
}
private void updateMetrics() {
int numActivePools = 0;
int numStarvedPools = 0;
int numStarvedJobs = 0;
int totalRunningMaps = 0;
int totalRunningReduces = 0;
int totalMinReduces = 0;
int totalMaxReduces = 0;
int totalMinMaps = 0;
int totalMaxMaps = 0;
int totalRunningJobs = 0;
Collection<PoolInfo> infos = new LinkedList<PoolInfo>();
synchronized (jobTracker) {
synchronized(scheduler) {
PoolManager poolManager = scheduler.getPoolManager();
for (Pool pool: poolManager.getPools()) {
PoolInfo info = new PoolInfo(pool);
infos.add(info);
numStarvedJobs += info.numStarvedJobs;
totalRunningJobs += info.runningJobs;
totalRunningMaps += info.runningMaps;
totalRunningReduces += info.runningReduces;
totalMinMaps += info.minMaps;
totalMinReduces += info.minReduces;
if (info.maxMaps != Integer.MAX_VALUE) {
totalMaxMaps += info.maxMaps;
}
if (info.maxReduces != Integer.MAX_VALUE) {
totalMaxReduces += info.maxReduces;
}
if (info.isActive()) {
++numActivePools;
}
if (info.isStarved()) {
++numStarvedPools;
}
}
}
}
for (PoolInfo info : infos) {
submitPoolMetrics(info);
}
metricsRecord.setMetric("num_active_pools", numActivePools);
metricsRecord.setMetric("num_starved_pools", numStarvedPools);
metricsRecord.setMetric("num_starved_jobs", numStarvedJobs);
metricsRecord.setMetric("num_running_jobs", totalRunningJobs);
metricsRecord.setMetric("total_min_maps", totalMinMaps);
metricsRecord.setMetric("total_max_maps", totalMaxMaps);
metricsRecord.setMetric("total_min_reduces", totalMinReduces);
metricsRecord.setMetric("total_max_reduces", totalMaxReduces);
}
private void submitPoolMetrics(PoolInfo info) {
String pool = info.poolName.toLowerCase();
MetricsRecord record = poolToMetricsRecord.get(pool);
if (record == null) {
record = MetricsUtil.createRecord(context, "pool-" + pool);
FairScheduler.LOG.info("Create metrics record for pool:" + pool);
poolToMetricsRecord.put(pool, record);
}
record.setMetric("min_map", info.minMaps);
record.setMetric("min_reduce", info.minReduces);
record.setMetric("max_map", info.maxMaps);
record.setMetric("max_reduce", info.maxReduces);
record.setMetric("running_map", info.runningMaps);
record.setMetric("running_reduce", info.runningReduces);
record.setMetric("runnable_map", info.runnableMaps);
record.setMetric("runnable_reduce", info.runnableReduces);
}
private class PoolInfo {
final String poolName;
final int runningJobs;
final int minMaps;
final int minReduces;
final int maxMaps;
final int maxReduces;
int runningMaps = 0;
int runningReduces = 0;
int runnableMaps = 0;
int runnableReduces = 0;
int numStarvedJobs = 0;
PoolInfo(Pool pool) {
PoolManager poolManager = scheduler.getPoolManager();
poolName = pool.getName();
runningJobs = pool.getJobs().size();
minMaps = poolManager.getAllocation(poolName, TaskType.MAP);
minReduces = poolManager.getAllocation(poolName, TaskType.REDUCE);
maxMaps = poolManager.getMaxSlots(poolName, TaskType.MAP);
maxReduces = poolManager.getMaxSlots(poolName, TaskType.REDUCE);
countTasks(pool);
}
private void countTasks(Pool pool) {
for (JobInProgress job: pool.getJobs()) {
JobInfo info = scheduler.infos.get(job);
if (info != null) {
runningMaps += info.runningMaps;
runningReduces += info.runningReduces;
runnableMaps += info.neededMaps + info.runningMaps;
runnableReduces += info.neededReduces + info.runningReduces;
if (isStarvedJob(info)) {
++numStarvedJobs;
}
}
}
}
private boolean isStarvedJob(JobInfo info) {
return ((info.neededMaps + info.runningMaps > info.mapFairShare &&
info.runningMaps < info.mapFairShare) ||
(info.neededReduces + info.runningReduces > info.reduceFairShare &&
info.runningReduces < info.reduceFairShare));
}
boolean isActive() {
return !(runningJobs == 0 && minMaps == 0 && minReduces == 0 &&
maxMaps == Integer.MAX_VALUE && maxReduces == Integer.MAX_VALUE &&
runningMaps == 0 && runningReduces == 0);
}
boolean isStarved() {
return ((runnableMaps > minMaps && runningMaps < minMaps) ||
(runnableReduces > minReduces && runningReduces < minReduces));
}
}
}