/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.net.Node; import org.apache.hadoop.net.TopologyCache; import org.apache.hadoop.mapred.JobInProgress.Counter; import java.util.ArrayList; /** * Record locality information. Can perform the locality computation in a * separate thread. */ public class LocalityStats implements Runnable { /** Logger. */ private static final Log LOG = LogFactory.getLog(LocalityStats.class); /** Topology cache. */ private final TopologyCache topologyCache; /** Max locality level. */ private final int maxLevel; /** Job Counters. */ private final Counters jobCounters; /** Job Statistics. */ private final JobStats jobStats; /** List of records to be used for asynchronous operation. */ private final ArrayList<Record> localityRecords = new ArrayList<Record>(); /** In async mode, used to check if we are running. */ private volatile boolean running = true; /** * Constructor. * @param jobConf Job Configuration. * @param maxLevel The maximum locality level. * @param counters The job counters to update. * @param jobStats The job statistics to update. */ public LocalityStats( JobConf jobConf, int maxLevel, Counters counters, JobStats jobStats, TopologyCache topologyCache) { this.topologyCache = topologyCache; this.maxLevel = maxLevel; this.jobCounters = counters; this.jobStats = jobStats; } public String getNode(String host) { return topologyCache.getNode(host).toString(); } /** * Representation of information for asynchronous update. */ private static class Record { /** The task. */ private final TaskInProgress tip; /** The task tracker host. */ private final String host; /** The number of bytes processed. */ private final long inputBytes; /** * Constructor * @param tip The task. * @param host The task tracker host. * @param inputBytes The number of bytes processed. */ private Record(TaskInProgress tip, String host, long inputBytes) { this.tip = tip; this.host = host; this.inputBytes = inputBytes; } } /** * In async mode, stop the thread. */ public void stop() { running = false; } /** * Asynchronous update of locality. * @param tip The task. * @param host The task tracker host. * @param inputBytes The number of bytes processed. */ public void record( TaskInProgress tip, String host, long inputBytes) { synchronized (localityRecords) { localityRecords.add(new Record(tip, host, inputBytes)); localityRecords.notify(); } } @Override public void run() { LOG.info("Starting locality computation thread"); while (running) { Record record = null; synchronized (localityRecords) { while (localityRecords.isEmpty()) { try { localityRecords.wait(); } catch (InterruptedException e) { if (!running) { return; } } } // Remove last element in the array. record = localityRecords.remove(localityRecords.size() - 1); } computeStatistics(record); } LOG.info("Exiting locality computation thread"); } /** * Peform the computation statistics based on a locality record. * @param record The locality information. */ private void computeStatistics(Record record) { computeStatistics(record.tip, record.host, record.inputBytes); } /** * Peform the computation statistics. * @param tip The task. * @param host The task tracker host. * @param inputBytes The number of bytes processed. */ private void computeStatistics( TaskInProgress tip, String host, long inputBytes) { int level = this.maxLevel; String[] splitLocations = tip.getSplitLocations(); if (splitLocations.length > 0) { Node tracker = topologyCache.getNode(host); // find the right level across split locations for (String local : splitLocations) { Node datanode = topologyCache.getNode(local); int newLevel = this.maxLevel; if (tracker != null && datanode != null) { newLevel = getMatchingLevelForNodes(tracker, datanode, maxLevel); } if (newLevel < level) { level = newLevel; if (level == 0) { break; } } } } boolean updateTaskCountOnly = inputBytes < 0; switch (level) { case 0: if (updateTaskCountOnly) { LOG.info("Chose data-local task " + tip.getTIPId()); jobCounters.incrCounter(Counter.DATA_LOCAL_MAPS, 1); jobStats.incNumDataLocalMaps(); } else { jobCounters.incrCounter(Counter.LOCAL_MAP_INPUT_BYTES, inputBytes); jobStats.incLocalMapInputBytes(inputBytes); } break; case 1: if (updateTaskCountOnly) { LOG.info("Chose rack-local task " + tip.getTIPId()); jobCounters.incrCounter(Counter.RACK_LOCAL_MAPS, 1); jobStats.incNumRackLocalMaps(); } else { jobCounters.incrCounter(Counter.RACK_MAP_INPUT_BYTES, inputBytes); jobStats.incRackMapInputBytes(inputBytes); } break; default: LOG.info("Chose non-local task " + tip.getTIPId() + " at level " + level); // check if there is any locality if (updateTaskCountOnly && level != this.maxLevel) { jobCounters.incrCounter(Counter.OTHER_LOCAL_MAPS, 1); } break; } } public static int getMatchingLevelForNodes(Node n1, Node n2, int maxLevel) { int count = 0; do { if (n1.equals(n2)) { return count; } ++count; n1 = n1.getParent(); n2 = n2.getParent(); } while (n1 != null && n2 != null); return maxLevel; } }