package org.apache.hadoop.corona;
import java.io.IOException;
import java.util.EnumMap;
import java.util.List;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.TopologyCache;
import org.apache.hadoop.mapred.ResourceTracker;
import org.apache.thrift.TException;
public class TestPreemption extends TestCase {
final static Log LOG = LogFactory.getLog(TestPreemption.class);
public final static String sessionHost = "localhost";
public static int getSessionPort(int i) {
return (7000 + i);
}
private Configuration conf;
private ClusterManagerTestable cm;
private ClusterNodeInfo nodes [];
private int numNodes;
private SessionInfo sessionInfos [];
private int numSessions;
private String handles [];
private Session sessions [];
protected TopologyCache topologyCache;
@Override
protected void setUp() throws IOException {
conf = new Configuration();
conf.setBoolean(CoronaConf.CONFIGURED_POOLS_ONLY, false);
conf.setClass("topology.node.switch.mapping.impl",
org.apache.hadoop.net.IPv4AddressTruncationMapping.class,
org.apache.hadoop.net.DNSToSwitchMapping.class);
conf.set(CoronaConf.CPU_TO_RESOURCE_PARTITIONING, TstUtils.std_cpu_to_resource_partitioning);
topologyCache = new TopologyCache(conf);
cm = new ClusterManagerTestable(conf);
numNodes = 10;
nodes = new ClusterNodeInfo[numNodes];
Map<ResourceType, String> resourceInfos =
new EnumMap<ResourceType, String>(ResourceType.class);
resourceInfos.put(ResourceType.MAP, "");
resourceInfos.put(ResourceType.REDUCE, "");
for (int i=0; i<numNodes; i++) {
nodes[i] = new ClusterNodeInfo(TstUtils.getNodeHost(i),
new InetAddress(TstUtils.getNodeHost(i),
TstUtils.getNodePort(i)),
TstUtils.std_spec);
nodes[i].setFree(TstUtils.std_spec);
nodes[i].setResourceInfos(resourceInfos);
}
setupSessions(3);
}
protected void setupSessions(int num) {
numSessions = num;
CoronaConf coronaConf = new CoronaConf(conf);
sessionInfos = new SessionInfo [numSessions];
handles = new String [numSessions];
sessions = new Session [numSessions];
for (int i =0; i<numSessions; i++) {
sessionInfos[i] = new SessionInfo(new InetAddress(sessionHost, getSessionPort(i)),
"s_" + i, "hadoop");
sessionInfos[i].setPriority(SessionPriority.NORMAL);
coronaConf.set(CoronaConf.EXPLICIT_POOL_PROPERTY, "pool" + i);
sessionInfos[i].setPoolInfoStrings(
PoolInfo.createPoolInfoStrings(coronaConf.getPoolInfo()));
}
}
public void testPreemptForMinimum() throws Throwable {
FakeConfigManager configManager = cm.getConfigManager();
int s1MinSlots = 60;
configManager.setMinimum(
new PoolInfo(PoolGroupManager.DEFAULT_POOL_GROUP, "pool1"),
ResourceType.MAP, s1MinSlots);
configManager.setStarvingTimeForMinimum(200L);
try {
for (int i=0; i<numSessions; i++) {
handles[i] = TstUtils.startSession(cm, sessionInfos[i]);
sessions[i] = cm.getSessionManager().getSession(handles[i]);
TstUtils.reliableSleep(500);
}
int [] maps = {800, 100};
int [] reduces = {800, 100};
submitRequests(handles[0], maps[0], reduces[0]);
verifySession(sessions[0], ResourceType.MAP, maps[0], 0);
verifySession(sessions[0], ResourceType.REDUCE, reduces[0], 0);
addAllNodes();
TstUtils.reliableSleep(1000);
int maxMaps = cm.getNodeManager().getMaxCpuForType(ResourceType.MAP);
int maxReduces = cm.getNodeManager().getMaxCpuForType(ResourceType.REDUCE);
verifySession(sessions[0], ResourceType.MAP, maps[0], maxMaps);
verifySession(sessions[0], ResourceType.REDUCE, reduces[0], maxReduces);
// Pool1 has a minimum of 60 for M, so it preempts 60 slots
submitRequests(handles[1], maps[1], reduces[1]);
TstUtils.reliableSleep(SchedulerForType.PREEMPTION_PERIOD * 2);
verifySession(sessions[0], ResourceType.MAP, maps[0], maxMaps - s1MinSlots, s1MinSlots);
verifySession(sessions[0], ResourceType.REDUCE, reduces[0], maxReduces);
verifySession(sessions[1], ResourceType.MAP, maps[1], s1MinSlots);
verifySession(sessions[1], ResourceType.REDUCE, reduces[1], 0);
for (int i = 0; i < numSessions; i++) {
cm.sessionEnd(handles[i], SessionStatus.SUCCESSFUL);
}
} catch (InvalidSessionHandle e) {
LOG.error("Bad Session Handle");
assertEquals("Bad Session Handle", null);
} catch (Throwable t) {
t.printStackTrace();
throw t;
}
}
public void testPreemptForShare() throws Throwable {
FakeConfigManager configManager = cm.getConfigManager();
configManager.setShareStarvingRatio(0.5);
configManager.setStarvingTimeForShare(200L);
try {
for (int i=0; i<numSessions; i++) {
handles[i] = TstUtils.startSession(cm, sessionInfos[i]);
sessions[i] = cm.getSessionManager().getSession(handles[i]);
TstUtils.reliableSleep(500);
}
int [] maps = {800, 100};
int [] reduces = {800, 100};
submitRequests(handles[0], maps[0], reduces[0]);
verifySession(sessions[0], ResourceType.MAP, maps[0], 0);
verifySession(sessions[0], ResourceType.REDUCE, reduces[0], 0);
addAllNodes();
TstUtils.reliableSleep(1000);
int maxMaps = cm.getNodeManager().getMaxCpuForType(ResourceType.MAP);
int maxReduces = cm.getNodeManager().getMaxCpuForType(ResourceType.REDUCE);
verifySession(sessions[0], ResourceType.MAP, maps[0], maxMaps);
verifySession(sessions[0], ResourceType.REDUCE, reduces[0], maxReduces);
// Pool1 will starving for share. So it preempt half of M and R slots
submitRequests(handles[1], maps[1], reduces[1]);
TstUtils.reliableSleep(SchedulerForType.PREEMPTION_PERIOD * 2);
verifySession(sessions[0], ResourceType.MAP, maps[0], maxMaps / 2, maxMaps / 2);
verifySession(sessions[0], ResourceType.REDUCE, reduces[0], maxReduces / 2, maxReduces / 2);
verifySession(sessions[1], ResourceType.MAP, maps[1], maxMaps / 2);
verifySession(sessions[1], ResourceType.REDUCE, reduces[1], maxReduces / 2);
for (int i = 0; i < numSessions; i++) {
cm.sessionEnd(handles[i], SessionStatus.SUCCESSFUL);
}
} catch (InvalidSessionHandle e) {
LOG.error("Bad Session Handle");
assertEquals("Bad Session Handle", null);
} catch (Throwable t) {
t.printStackTrace();
throw t;
}
}
public void testPreemptWithDelayedRelease() throws Throwable {
LOG.info("Starting testPreemptWithDelayedRelease");
// Here we are testing that a pool with non-zero number of resource requests
// but 0 pending requests does not cause pre-emption.
FakeConfigManager configManager = cm.getConfigManager();
NodeManager nm = cm.getNodeManager();
configManager.setMinPreemptPeriod(100L);
configManager.setShareStarvingRatio(0.75);
configManager.setStarvingTimeForShare(200L);
configManager.setStarvingTimeForMinimum(200L);
// Create 4 sessions.
setupSessions(4);
for (int i=0; i<numSessions; i++) {
handles[i] = TstUtils.startSession(cm, sessionInfos[i]);
sessions[i] = cm.getSessionManager().getSession(handles[i]);
TstUtils.reliableSleep(500);
}
// We are only testing for maps for simplicity. Each of the sessions
// wants to take over the whole cluster.
int [] maps = {64, 64, 64, 64};
int [] reduces = {0, 0, 0, 0};
// 8 nodes => 64 total slots.
addSomeNodes(8);
TstUtils.reliableSleep(500);
int maxMaps = cm.getNodeManager().getMaxCpuForType(ResourceType.MAP);
// First session gets everything.
verifySession(sessions[0], ResourceType.MAP, 0, 0);
submitRequests(handles[0], maps[0], reduces[0]);
TstUtils.reliableSleep(100);
verifySession(sessions[0], ResourceType.MAP, maps[0], maxMaps);
assertEquals(maxMaps, nm.getAllocatedCpuForType(ResourceType.MAP));
// Start remaining sessions. Now we should have roughly equal usage.
for (int i = 1; i < numSessions; i++) {
verifySession(sessions[i], ResourceType.MAP, 0, 0);
submitRequests(handles[i], maps[i], reduces[i]);
}
TstUtils.reliableSleep(SchedulerForType.PREEMPTION_PERIOD * 2);
// We have an off-by-one difference in share, which is OK.
verifySession(sessions[0], ResourceType.MAP, maps[0], maxMaps/4 + 3, 3*maxMaps/4 - 3);
for (int i = 1; i < numSessions; i++) {
verifySession(sessions[i], ResourceType.MAP, maps[i], maxMaps/4 - 1);
}
assertEquals(maxMaps, nm.getAllocatedCpuForType(ResourceType.MAP));
assertEquals(3*maxMaps/4 - 3,
sessions[0].getRevokedRequestCountForType(ResourceType.MAP));
// End two sessions, keeping the first and the last one alive.
cm.sessionEnd(handles[2], SessionStatus.SUCCESSFUL);
cm.sessionEnd(handles[1], SessionStatus.SUCCESSFUL);
TstUtils.reliableSleep(SchedulerForType.PREEMPTION_PERIOD * 2);
// At this point, the first session will have 0 pending requests, but is
// below its share. But we dont to preempt on its behalf, since it cannot
// use any slots because of 0 pending.
assertEquals(3*maxMaps/4 - 3,
sessions[0].getRevokedRequestCountForType(ResourceType.MAP));
for (int i = 1; i < numSessions; i++) {
assertEquals("Revokes for session " + i + " are not OK", 0,
sessions[i].getRevokedRequestCountForType(ResourceType.MAP));
}
}
private void submitRequests(String handle, int maps, int reduces)
throws TException, InvalidSessionHandle, SafeModeException {
List<ResourceRequest> requests =
TstUtils.createRequests(this.numNodes, maps, reduces);
cm.requestResource(handle, requests);
}
private void verifySession(Session session, ResourceType type,
int request, int grant, int preempted) {
synchronized (session) {
assertEquals(grant, session.getGrantCountForType(type));
assertEquals(request, session.getRequestCountForType(type));
assertEquals(request - grant - preempted,
session.getPendingRequestForType(type).size());
}
}
private void verifySession(Session session, ResourceType type,
int request, int grant) {
verifySession(session, type, request, grant, 0);
}
private void addSomeNodes(int count) throws TException {
for (int i=0; i<count; i++) {
try {
cm.nodeHeartbeat(nodes[i]);
} catch (DisallowedNode e) {
throw new TException(e);
} catch (SafeModeException e) {
LOG.info("Cluster Manager is in Safe Mode");
}
}
}
private void addAllNodes() throws TException {
addSomeNodes(this.numNodes);
}
}