/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.corona.ComputeSpecs;
import org.apache.hadoop.corona.InetAddress;
import org.apache.hadoop.corona.ResourceGrant;
import org.apache.hadoop.corona.ResourceRequest;
import org.apache.hadoop.corona.ResourceType;
import org.apache.hadoop.corona.Utilities;
public class ResourceTracker {
public static final Log LOG = LogFactory.getLog(ResourceTracker.class);
/** Highest integer that won't be used for request/grant ids */
public static final int START_REQUEST_ID = 0;
/** Null grant id is an id of non-existing grant, for restoring JT state */
public static Integer NONE_GRANT_ID = new Integer(START_REQUEST_ID - 1);
/**
* Determines whether given id represents non-existing resource grant.
* @param id an id to check
* @return true if resource with given id can't exist
*/
public static boolean isNoneGrantId(Integer id) {
return NONE_GRANT_ID.equals(id);
}
/**
* Returns id representing non-existing resource grant.
* @return id id of resource grant
*/
public static Integer getNoneGrantId() {
return NONE_GRANT_ID;
}
/** Keeps last (possibly) assigned resource id */
AtomicInteger resourceRequestId = new AtomicInteger(START_REQUEST_ID);
static ComputeSpecs stdMapSpec() {
short numCpus = 1;
ComputeSpecs spec = new ComputeSpecs(numCpus);
// Create Compute Spec.
spec.setNetworkMBps((short) 10);
spec.setMemoryMB(1024);
spec.setDiskGB(10);
return spec;
}
static ComputeSpecs stdReduceSpec() {
short numCpus = 1;
ComputeSpecs spec = new ComputeSpecs(numCpus);
spec.setNetworkMBps((short) 50);
spec.setMemoryMB(1024);
spec.setDiskGB(10);
return spec;
}
//This tracks all resources registered and not released in resource tracker,
// also those not sent to CM
HashMap<Integer, ResourceRequest> requestMap =
new HashMap<Integer, ResourceRequest>();
// This tracks all resource requests sent to the Cluster Manager.
// New requests are sent by computing
// (Requests in taskToContextMap) - (Requests in requestedResources)
Map<Integer, ResourceRequest> requestedResources =
new HashMap<Integer, ResourceRequest>();
// Lookup table for all granted resources.
HashMap<Integer, ResourceGrant> grantedResources =
new HashMap<Integer, ResourceGrant>();
// Granted resources that are not already in use.
Set<Integer> availableResources = new HashSet<Integer>();
int maxReduceGrants = 0;
int maxMapGrants = 0;
// Map for address information of a tracker.
Map<String, InetAddress> trackerAddress = new HashMap<String, InetAddress>();
private final Object lockObject;
public ResourceTracker(Object lockObject) {
this.lockObject = lockObject;
}
/**
* Get a snapshot of the resource usage.
*
* @return Snapshot of resource usage
*/
public ResourceUsage getResourceUsage() {
int totalMapperGrants = 0;
int totalReducerGrants = 0;
synchronized (lockObject) {
for (Map.Entry<Integer, ResourceGrant> entry :
grantedResources.entrySet()) {
switch(entry.getValue().getType()) {
case MAP:
++totalMapperGrants;
break;
case REDUCE:
++totalReducerGrants;
break;
case JOBTRACKER:
// Ignore for now
break;
default:
throw new RuntimeException("Illegal type " +
entry.getValue().getType());
}
}
}
return new ResourceUsage(totalMapperGrants, totalReducerGrants);
}
/**
* Find what new requests need to be sent by finding out resources needed
* by tasks but not sent to Cluster Manager.
* @return
*/
public List<ResourceRequest> getWantedResources() {
List<ResourceRequest> wanted = new ArrayList<ResourceRequest>();
synchronized(lockObject) {
for (Integer requestId:
setDifference(requestMap.keySet(), requestedResources.keySet())) {
ResourceRequest req = requestMap.get(requestId);
LOG.info("Filing request for resource " + requestId);
requestedResources.put(requestId, req);
wanted.add(req);
}
}
return wanted;
}
/**
* Go through all the requested resources and find what needs to be released.
* @return
*/
public List<ResourceRequest> getResourcesToRelease() {
List<ResourceRequest> release = new ArrayList<ResourceRequest>();
synchronized(lockObject) {
for (Integer requestId:
setDifference(requestedResources.keySet(), requestMap.keySet())) {
// We update the data structures right away. This assumes that the
// caller will be able to release the resources.
ResourceRequest req = requestedResources.remove(requestId);
if (req != null) {
release.add(req);
LOG.info("Filing release for requestId: " + req.getId());
}
}
}
return release;
}
public ResourceRequest releaseAndRequestResource(Integer grantIdToRelease, Set<String> excludedHosts) {
synchronized(lockObject) {
ResourceRequest requestToRelease = requestedResources.get(grantIdToRelease);
if (requestToRelease != null) {
removeRequestUnprotected(requestToRelease);
ResourceRequest request = copyRequest(requestToRelease, excludedHosts);
recordRequestUnprotected(request);
LOG.info ("releaseAndRequest for grant: " + grantIdToRelease + " completed " +
(excludedHosts != null ? "excluding resource" : "") +
". Generated new request #" + request.getId());
return request;
} else {
LOG.info ("releaseAndRequest for grant: " + grantIdToRelease + " not found");
return null;
}
}
}
/**
* Release the resource that was requested
*/
public void releaseResource(int resourceId) {
synchronized (lockObject) {
ResourceRequest req = requestedResources.get(resourceId);
removeRequestUnprotected(req);
}
}
public void reuseGrant(Integer grantIdToReuse) {
synchronized(lockObject) {
if (grantedResources.containsKey(grantIdToReuse)) {
availableResources.add(grantIdToReuse);
lockObject.notify();
}
}
LOG.info ("reuseGrant for grant: " + grantIdToReuse);
}
/**
* Obtained new grants from Cluster Manager.
* @param grants
*/
public void addNewGrants(List<ResourceGrant> grants) {
int numGranted = 0;
int numAvailable = 0;
synchronized(lockObject) {
for (ResourceGrant grant: grants) {
Integer requestId = grant.getId();
if (!requestedResources.containsKey(requestId) ||
!requestMap.containsKey(requestId)) {
LOG.info("Request for grant " + grant.getId() + " no longer exists");
continue;
}
assert !grantedResources.containsKey(grant.getId()) :
"Grant " + grant.getId() + " has already been processed.";
updateTrackerAddressUnprotected(grant);
addGrantedResourceUnprotected(grant);
}
updateGrantStatsUnprotected();
numGranted = grantedResources.size();
numAvailable = availableResources.size();
lockObject.notify();
}
LOG.info("Number of available grants: " + numAvailable +
" out of " + numGranted);
}
public interface ResourceProcessor {
public boolean processAvailableResource(ResourceGrant resource);
}
public void processAvailableGrants(
ResourceProcessor processor, int maxBatchSize) throws InterruptedException {
processAvailableGrants(processor, maxBatchSize, Long.MAX_VALUE);
}
public void processAvailableGrants(
ResourceProcessor processor,
int maxBatchSize,
long timeout) throws InterruptedException {
synchronized(lockObject) {
while (availableResources.isEmpty()) {
lockObject.wait(timeout);
if (availableResources.isEmpty()) {
LOG.warn("No available resources after timeout of " + timeout);
return;
}
}
List<Integer> resourcesConsumed = new ArrayList<Integer>();
List<Integer> stillAvailable = new ArrayList<Integer>();
Iterator<Integer> grantIter = availableResources.iterator();
int processed = 0;
while (grantIter.hasNext() && processed < maxBatchSize) {
processed++;
Integer grantId = grantIter.next();
grantIter.remove();
Integer requestId = grantId;
ResourceGrant grant = grantedResources.get(grantId);
if (processor.processAvailableResource(grant)) {
if (LOG.isDebugEnabled()) {
LOG.info("processed available resource with requestId: " +
requestId);
}
resourcesConsumed.add(grantId);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("available resource with requestId: " + requestId +
" is not processed and is still available");
}
stillAvailable.add(grantId);
}
}
// Remove consumed resources from the available set.
availableResources.addAll(stillAvailable);
if (processed < maxBatchSize) {
// We did not have enough to process, wait for some time before
// next iteration. If more resources become available, the object
// will be notified.
lockObject.wait(500);
} else {
// We processed a batch of data, wait for a short time to yield
// the lock.
lockObject.wait(1);
}
}
}
public ResourceGrant getGrant(Integer grantId) {
synchronized(lockObject) {
return grantedResources.get(grantId);
}
}
public int maxGrantedResources(boolean map) {
synchronized(lockObject) {
if (map) {
return maxMapGrants;
} else {
return maxReduceGrants;
}
}
}
public Map<String, InetAddress> allTrackers() {
synchronized(lockObject) {
return new HashMap<String, InetAddress>(trackerAddress);
}
}
public InetAddress getTrackerAddr(String trackerName) {
synchronized(lockObject) {
return trackerAddress.get(trackerName);
}
}
/**
* Updates mapping between tracker names and adresses
* @param trackerName name of tracker
* @param addr address of the tracker
*/
public void updateTrackerAddr(String trackerName, InetAddress addr) {
synchronized (lockObject) {
trackerAddress.put(trackerName, addr);
}
}
public int getTrackerPort(String trackerName) {
synchronized(lockObject) {
return trackerAddress.get(trackerName).getPort();
}
}
private ResourceRequest copyRequest(ResourceRequest requestToCopy,
Set<String> excludedByTip) {
int requestId = resourceRequestId.incrementAndGet();
ResourceRequest req = new ResourceRequest(requestId, requestToCopy.getType());
req.setSpecs(requestToCopy.getSpecs());
Set<String> excluded = new HashSet<String>();
if (excludedByTip != null || requestToCopy.getExcludeHosts() != null) {
if (requestToCopy.getExcludeHosts() != null) {
excluded.addAll(requestToCopy.getExcludeHosts());
}
if (excludedByTip != null) {
excluded.addAll(excludedByTip);
}
}
req.setExcludeHosts(new ArrayList<String>(excluded));
if (requestToCopy.getHosts() != null) {
List<String> hosts = new ArrayList<String>();
for (String host : requestToCopy.getHosts()) {
if (excluded == null || !excluded.contains(host)) {
hosts.add(host);
}
}
if (!hosts.isEmpty()) {
req.setHosts(hosts);
}
}
return req;
}
public int nextRequestId() {
return resourceRequestId.incrementAndGet();
}
public ResourceRequest newMapRequest(String[] splitLocations) {
int requestId = nextRequestId();
ResourceRequest req = new ResourceRequest(requestId,
ResourceType.MAP);
req.setSpecs(stdMapSpec());
List<String> hosts = new ArrayList<String>();
for (int j = 0; j < splitLocations.length; j++) {
hosts.add(splitLocations[j]);
}
if (!hosts.isEmpty()) {
req.setHosts(hosts);
}
return req;
}
public ResourceRequest newReduceRequest() {
int requestId = nextRequestId();
ResourceRequest req = new ResourceRequest(requestId,
ResourceType.REDUCE);
req.setSpecs(stdReduceSpec());
return req;
}
public ResourceRequest newJobTrackerRequest() {
int requestId = nextRequestId();
ResourceRequest req = new ResourceRequest(requestId,
ResourceType.JOBTRACKER);
req.setSpecs(stdReduceSpec());
return req;
}
public void recordRequest(ResourceRequest req) {
synchronized (lockObject) {
recordRequestUnprotected(req);
}
}
private void recordRequestUnprotected(ResourceRequest req) {
requestMap.put(req.getId(), req);
}
private void removeGrantedResourceUnprotected(Integer id) {
boolean wasResourceAvailable = false;
if (availableResources.contains(id)) {
LOG.info("Removing " + id + " from available " + availableResources);
wasResourceAvailable = availableResources.remove(id);
}
Object granted = grantedResources.remove(id);
if (wasResourceAvailable && granted == null) {
throw new RuntimeException(
"Resource " + id + " was available but not granted");
}
if (granted != null && !requestMap.containsKey(id)) {
throw new RuntimeException(
"Resource " + id + " was granted but not requested");
}
}
private void addGrantedResourceUnprotected(ResourceGrant grant) {
Integer id = grant.getId();
grantedResources.put(id, grant);
availableResources.add(id);
}
private void updateGrantStatsUnprotected() {
int numMapGrants = 0;
int numReduceGrants = 0;
for (ResourceGrant grant: grantedResources.values()) {
switch (grant.getType()) {
case MAP:
++numMapGrants;
break;
case REDUCE:
++numReduceGrants;
break;
case JOBTRACKER:
// Not tracked for now
break;
default:
throw new RuntimeException("Unknown resource type " + grant.getType());
}
}
maxMapGrants = Math.max(maxMapGrants, numMapGrants);
maxReduceGrants = Math.max(maxReduceGrants, numReduceGrants);
}
private void updateTrackerAddressUnprotected(ResourceGrant grant) {
String trackerName = grant.getNodeName();
// Update address information for trackers.
InetAddress addr = Utilities.appInfoToAddress(grant.appInfo);
trackerAddress.put(trackerName, addr);
}
/**
* Removes the request from requestToTipMap, and if updateTaskContext is true,
* also from taskContext map. It *does not* remove from requestedResources.
* This lets getResourcesToRelease() figure out what needs to be released.
*/
private void removeRequestUnprotected(ResourceRequest req) {
Integer requestId = req.getId();
removeGrantedResourceUnprotected(requestId);
requestMap.remove(requestId);
}
private static <T> List<T> setDifference(Set<T> s1, Set<T> s2) {
List<T> diff = new ArrayList<T>();
for (T one: s1) {
if (!s2.contains(one)) {
diff.add(one);
}
}
return diff;
}
public boolean hasAvailableResources() {
synchronized (lockObject) {
return !availableResources.isEmpty();
}
}
public static List<ResourceType> resourceTypes() {
return Arrays.asList(ResourceType.MAP, ResourceType.REDUCE);
}
}