package org.apache.hadoop.mapred;
import java.io.File;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
public class Cluster {
private String hostName;
private String httpAddress;
private String version;
private String hadoopHome;
private String slavesFile;
// Whether we can move a node anytime or should wait for all maps to finish
private boolean waitForMaps;
private List<TaskTrackerLoadInfo> trackers =
new ArrayList<TaskTrackerLoadInfo>();
private Set<String> exclusiveTrackers = new HashSet<String>();
private int maxLoad;
private int minLoad;
private int minNodes;
// Switches to false the first time we initialize full buffer
private boolean startup = true;
private int loadHistoryPos = 0;
private int[] loadHistory = new int[10];
TTLauncher ttLauncher;
public String getHostName() {
return hostName;
}
public String getHttpAddress() {
return httpAddress;
}
public int getMaxLoad() {
return maxLoad;
}
public int getMinLoad() {
return minLoad;
}
public List<TaskTrackerLoadInfo> getTrackers() {
return trackers;
}
public String getVersion() {
return version;
}
public void insertLoad(int load) {
DynamicCloudsDaemon.LOG.info("Got the new load value for " + hostName + " : " + load);
loadHistory[loadHistoryPos++] = load;
// The first time we pass the loadHistory.size - startup is over
if (startup && loadHistory.length == loadHistoryPos) {
DynamicCloudsDaemon.LOG.info("Exiting startup mode");
}
startup = startup && loadHistory.length != loadHistoryPos;
loadHistoryPos = loadHistoryPos % loadHistory.length;
}
public int getAverageLoad() {
int total = 0;
for (int i = 0; i < loadHistory.length; i++) {
total += loadHistory[i];
}
return total / loadHistory.length;
}
public Cluster(String httpAddress, int minLoad, int maxLoad,
boolean waitForMaps, int minNodes, TTLauncher ttLauncher,
String exclusiveTrackers)
throws MalformedURLException, IOException {
super();
if (minLoad < 0 || minLoad > 100 || maxLoad < 0 || maxLoad > 100) {
DynamicCloudsDaemon.LOG.error("min load and max load of the cluster " + "have to be integer values from 0 to 100");
throw new IOException("Failed to initialize cluster " + httpAddress);
}
if (minLoad >= maxLoad && maxLoad != 100) {
DynamicCloudsDaemon.LOG.error("min load of the cluster has to be smaller than the max " + "load of the cluster. The only exception is when they are " + "both 100% that means the cluster can always give " + "machines and only takes machines of noone else needs them");
throw new IOException("Failed to initialize cluster " + httpAddress);
}
URL uri = new URL(httpAddress);
hostName = uri.getHost();
if (exclusiveTrackers != null) {
File file = new File(exclusiveTrackers);
if (file.exists()) {
BufferedReader reader =
new BufferedReader(new FileReader(file));
String hostName = null;
while ((hostName = reader.readLine()) != null) {
this.exclusiveTrackers.add(hostName);
}
}
}
this.httpAddress = httpAddress;
this.waitForMaps = waitForMaps;
this.minLoad = minLoad;
this.maxLoad = maxLoad;
this.minNodes = minNodes;
this.ttLauncher = ttLauncher;
}
public void load() throws IOException {
DynamicCloudsDaemon.LOG.info("Loading initial cluster info");
Map<String, String> conf = ClusterStatusJSONParser.getClusterConf(httpAddress);
version = conf.get("version");
slavesFile = conf.get("slaves.file");
File file = new File(slavesFile);
// HADOOP_HOME/conf/slaves parent of parent is hadoop home
hadoopHome = file.getParentFile().getParent();
}
public synchronized void poll() {
try {
trackers = ClusterStatusJSONParser.getJobTrackerStatus(httpAddress);
insertLoad(getCurrentClusterLoad());
} catch (IOException ex) {
// The tracker went down - reset the cluster status
startup = true;
Arrays.fill(loadHistory, 0, loadHistory.length, 100);
loadHistoryPos = 0;
DynamicCloudsDaemon.LOG.error("Error updating cluster " +
this.getHostName());
}
}
public int getWaitingFor() {
return ttLauncher.getTasksInQueue(hostName);
}
public int countMachineShortage() {
// It is starting up - too early to tell
if (startup) {
return 0;
}
if (trackers.size() == 0) {
// Something bad happened, but the cluster probably needs nodes
return 1;
}
// The cluster is operating at normal load
if (getAverageLoad() < maxLoad) {
return 0;
}
float deficit = getAverageLoad() / (float) maxLoad - 1;
if (getAverageLoad() == 100 && maxLoad == 100) {
// Special case when the cluster is configured to operate
// fully loaded, but if there are free nodes it can use them
return 1;
}
int machines = (int) Math.ceil(deficit * trackers.size());
return machines;
}
public int countSpareMachines() {
// The cluster is only starting up
if (startup) {
return 0;
}
if (waitForMaps && countIdleNodes() == 0) {
return 0;
}
if (trackers.size() - minNodes <= 0) {
// The cluster is at it's smallest allowed size
return 0;
}
// The cluster is busy and cannot spare nodes
if (getAverageLoad() > minLoad) {
return 0;
}
float change = 1 - getAverageLoad() / (float) minLoad;
int spareMachines = (int) Math.ceil(change * trackers.size());
if (getAverageLoad() == 100 && minLoad == 100 && trackers.size() > 1) {
// This is a special case: the cluster is fully loaded,
// but it can still spare nodes since it is OK if it runs at 100% load
return 1;
}
// one machine has to stay.
// It should really be configurable for each cluster
return Math.min(spareMachines, trackers.size() - minNodes);
}
public int countIdleNodes() {
int idles = 0;
for (TaskTrackerLoadInfo ttli : trackers) {
if (ttli.getTotalMapTasks() == 0) {
idles++;
}
}
return idles;
}
public synchronized int getCurrentClusterLoad() {
int maps = 0;
int maxMaps = 0;
int reduces = 0;
int maxReduces = 0;
for (TaskTrackerLoadInfo tracker : trackers) {
maps += tracker.getRunningMapTasks();
maxMaps += tracker.getMaxMapTasks();
reduces += tracker.getRunningReduceTasks();
maxReduces += tracker.getMaxReduceTasks();
}
if (maxMaps == 0 || maxReduces == 0) {
// The cluster has no trackers. Load is 100%
// it should get some machines back asap
return 100;
}
int mapLoad = (maps * 100) / maxMaps;
int reduceLoad = (reduces * 100) / maxReduces;
return Math.max(mapLoad, reduceLoad);
}
public List<TaskTrackerLoadInfo> releaseTrackers(int numTrackers)
throws IOException {
List<TaskTrackerLoadInfo> releasedTrackers =
new ArrayList<TaskTrackerLoadInfo>();
TaskTrackerLoadInfoIterator iterator = new WastedTimeTTLIIterator();
iterator.setTrackers(trackers);
while (releasedTrackers.size() < numTrackers && iterator.hasNext()) {
TaskTrackerLoadInfo tracker = iterator.next();
String host = tracker.getTaskTrackerHost();
if (trackers.contains(host)) {
continue;
}
ShellCommandExecutor removeHostCommand = new ShellCommandExecutor(
new String[]{"ssh", hostName,
"cd " + hadoopHome + " && " + "bin/hadoop " +
TTMover.class.getCanonicalName() + " -remove " + host});
try {
removeHostCommand.execute();
releasedTrackers.add(tracker);
} catch (IOException ex) {
DynamicCloudsDaemon.LOG.error("Error removing tracker " +
tracker.getTaskTrackerName(), ex);
}
}
return releasedTrackers;
}
public List<TaskTrackerLoadInfo> addTrackers(List<TaskTrackerLoadInfo> trackers) {
List<TaskTrackerLoadInfo> trackersAdded = new ArrayList<TaskTrackerLoadInfo>();
for (TaskTrackerLoadInfo tracker : trackers) {
String host = tracker.getTaskTrackerHost();
ShellCommandExecutor addHostCommand =
new ShellCommandExecutor(
new String[]{"ssh", hostName,
"cd " + hadoopHome + " && " + "bin/hadoop " +
TTMover.class.getCanonicalName() +
" -add " + host});
try {
addHostCommand.execute();
trackersAdded.add(tracker);
} catch (IOException ex) {
DynamicCloudsDaemon.LOG.error("Error adding tracker " + tracker.getTaskTrackerName(), ex);
}
}
return trackersAdded;
}
public int launchTrackers(List<TaskTrackerLoadInfo> trackers) {
int trackersLaunched = 0;
for (TaskTrackerLoadInfo tracker : trackers) {
String host = tracker.getTaskTrackerHost();
String startCommand = DynamicCloudsDaemon.getStartCommand(version);
ShellCommandExecutor startTTCommand =
new ShellCommandExecutor(
new String[]{"ssh", host,
"cd " + hadoopHome + " && " + startCommand});
TTLaunchTask task = new TTLaunchTask(startTTCommand, this.hostName);
ttLauncher.addTTForLaunch(task);
}
return trackersLaunched;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("Host :");
buffer.append(this.hostName);
buffer.append(" HADOOP_HOME:");
buffer.append(this.hadoopHome);
buffer.append("\n");
return buffer.toString();
}
public synchronized String getStatus() {
int totalMapSlots = 0;
int totalReduceSlots = 0;
int runningMaps = 0;
int runningReduces = 0;
int allMaps = 0;
for (TaskTrackerLoadInfo tracker : trackers) {
runningMaps += tracker.getRunningMapTasks();
runningReduces += tracker.getRunningReduceTasks();
allMaps += tracker.getTotalMapTasks();
totalMapSlots += tracker.getMaxMapTasks();
totalReduceSlots += tracker.getMaxReduceTasks();
}
StringBuilder buffer = new StringBuilder();
buffer.append("Map Load: ");
buffer.append(runningMaps).append("/").append(totalMapSlots).append("\n");
buffer.append("Reduce Load: ");
buffer.append(runningReduces).append("/").append(totalReduceSlots).append("\n");
buffer.append("Map tasks finished on the cluster: ").append(allMaps - runningMaps).append("\n");
return buffer.toString();
}
}