package org.apache.hadoop.corona;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.examples.SleepJob;
import org.apache.hadoop.mapred.CoronaJobTracker;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.CoronaFailureEvent;
import org.apache.hadoop.util.CoronaFailureEventInjector;
import org.apache.hadoop.util.ToolRunner;
import junit.framework.TestCase;
public class TestRJTFailover extends TestCase {
private static final Log LOG =
LogFactory.getLog(TestRJTFailover.class);
private MiniCoronaCluster corona = null;
private CoronaFailureEventInjector rjtFailureInjector = new CoronaFailureEventInjector();
@Override
protected void setUp() throws Exception {
super.setUp();
String[] racks = "/rack-1,/rack-1,/rack-2,/rack-3".split(",");
String[] trackers = "tracker-1,tracker-2,tracker-3,tracker-4".split(",");
corona = new MiniCoronaCluster.Builder().
numTaskTrackers(4).
racks(racks).
hosts(trackers).rjtFailureInjector(rjtFailureInjector).
build();
}
/**
* Naming rule for the test case:
* test+(FailureEventString)+RJT+Fail(means the job will fail)
* test+(FailureEventString)+RJT+Failover+(number to do failover)
*
* This class is the parent class for all the RJT failover class
* Because the 10 minutes time limit for test case, just devide the RJT
* test case based on when to do failover
*/
public void testDummy() {
LOG.info("Starting testDummy");
}
@Override
protected void tearDown() {
if (corona != null) {
corona.shutdown();
}
}
protected void runSleepJob(JobConf conf, int maps, int reduces, int mt, int rt)
throws Exception {
String[] args = {"-m", maps + "",
"-r", reduces + "",
"-mt", mt + "",
"-rt", rt + "" };
ToolRunner.run(conf, new SleepJob(), args);
// This sleep is here to wait for the JobTracker to go down completely
TstUtils.reliableSleep(1000);
}
protected void doTestRJTFailover(
String testName, String [] failureEvents,
int maps, int reduces,
int mt, int rt,
int failNum) throws Exception {
LOG.info("Starting the test for " + testName);
for (String fe:failureEvents) {
CoronaFailureEvent event = CoronaFailureEvent.fromString(fe);
if (event != null) {
rjtFailureInjector.injectFailureEvent(event);
}
}
JobConf conf = corona.createJobConf();
conf.setBoolean("mapred.coronajobtracker.forceremote", true);
conf.setInt(CoronaJobTracker.MAX_JT_FAILURES_CONF, failNum);
long start = System.currentTimeMillis();
try {
this.runSleepJob(conf, maps, reduces, mt, rt);
//assertTrue("RJT failover is in a wrong state", failNum > 0);
} catch (Exception e) {
LOG.info("job failed.", e);
assertTrue("RJT failed to do failover", failNum == 0);
}
long end = System.currentTimeMillis();
LOG.info("Time spent for :" + testName +
(end - start));
}
}