/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.IOException; import junit.framework.TestCase; import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobInProgress; import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobTracker; import org.apache.hadoop.mapred.UtilsForTests.FakeClock; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; /** * A test to verify JobTracker's resilience to lost task trackers. * */ @SuppressWarnings("deprecation") public class TestLostTracker extends TestCase { FakeJobInProgress job; static FakeJobTracker jobTracker; static FakeClock clock; static String trackers[] = new String[] {"tracker_tracker1:1000", "tracker_tracker2:1000"}; @Override protected void setUp() throws Exception { JobConf conf = new JobConf(); conf.set(JTConfig.JT_IPC_ADDRESS, "localhost:0"); conf.set(JTConfig.JT_HTTP_ADDRESS, "0.0.0.0:0"); conf.setLong(JTConfig.JT_TRACKER_EXPIRY_INTERVAL, 1000); conf.set(JTConfig.JT_MAX_TRACKER_BLACKLISTS, "1"); jobTracker = new FakeJobTracker(conf, (clock = new FakeClock()), trackers); jobTracker.startExpireTrackersThread(); } @Override protected void tearDown() throws Exception { jobTracker.stopExpireTrackersThread(); } public void testLostTracker() throws IOException { // Tracker 0 contacts JT FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]); TaskAttemptID[] tid = new TaskAttemptID[2]; JobConf conf = new JobConf(); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker); job.initTasks(); // Tracker 0 gets the map task tid[0] = job.findMapTask(trackers[0]); job.finishTask(tid[0]); // Advance clock. Tracker 0 would have got lost clock.advance(8 * 1000); jobTracker.checkExpiredTrackers(); // Tracker 1 establishes contact with JT FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]); // Tracker1 should get assigned the lost map task tid[1] = job.findMapTask(trackers[1]); assertNotNull("Map Task from Lost Tracker did not get reassigned", tid[1]); assertEquals("Task ID of reassigned map task does not match", tid[0].getTaskID().toString(), tid[1].getTaskID().toString()); job.finishTask(tid[1]); } /** * Test whether the tracker gets blacklisted after its lost. */ public void testLostTrackerBeforeBlacklisting() throws Exception { FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]); TaskAttemptID[] tid = new TaskAttemptID[3]; JobConf conf = new JobConf(); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.set(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, "1"); conf.set(MRJobConfig.SETUP_CLEANUP_NEEDED, "false"); FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker); job.initTasks(); job.setClusterSize(4); // Tracker 0 gets the map task tid[0] = job.findMapTask(trackers[0]); job.finishTask(tid[0]); // validate the total tracker count assertEquals("Active tracker count mismatch", 1, jobTracker.getClusterStatus(false).getTaskTrackers()); // lose the tracker clock.advance(1100); jobTracker.checkExpiredTrackers(); assertFalse("Tracker 0 not lost", jobTracker.getClusterStatus(false).getActiveTrackerNames() .contains(trackers[0])); // validate the total tracker count assertEquals("Active tracker count mismatch", 0, jobTracker.getClusterStatus(false).getTaskTrackers()); // Tracker 1 establishes contact with JT FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]); // Tracker1 should get assigned the lost map task tid[1] = job.findMapTask(trackers[1]); assertNotNull("Map Task from Lost Tracker did not get reassigned", tid[1]); assertEquals("Task ID of reassigned map task does not match", tid[0].getTaskID().toString(), tid[1].getTaskID().toString()); // finish the map task job.finishTask(tid[1]); // finish the reduce task tid[2] = job.findReduceTask(trackers[1]); job.finishTask(tid[2]); // check if job is successful assertEquals("Job not successful", JobStatus.SUCCEEDED, job.getStatus().getRunState()); // check if the tracker is lost // validate the total tracker count assertEquals("Active tracker count mismatch", 1, jobTracker.getClusterStatus(false).getTaskTrackers()); // validate blacklisted count .. since we lost one blacklisted tracker assertEquals("Blacklisted tracker count mismatch", 0, jobTracker.getClusterStatus(false).getBlacklistedTrackers()); } /** * Test whether the tracker gets lost after its blacklisted. */ public void testLostTrackerAfterBlacklisting() throws Exception { FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]); clock.advance(600); TaskAttemptID[] tid = new TaskAttemptID[2]; JobConf conf = new JobConf(); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.set(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, "1"); conf.set(MRJobConfig.SETUP_CLEANUP_NEEDED, "false"); FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker); job.initTasks(); job.setClusterSize(4); // check if the tracker count is correct assertEquals("Active tracker count mismatch", 1, jobTracker.taskTrackers().size()); // Tracker 0 gets the map task tid[0] = job.findMapTask(trackers[0]); // Fail the task job.failTask(tid[0]); // Tracker 1 establishes contact with JT FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]); // check if the tracker count is correct assertEquals("Active tracker count mismatch", 2, jobTracker.taskTrackers().size()); // Tracker 1 gets the map task tid[1] = job.findMapTask(trackers[1]); // Finish the task and also the job job.finishTask(tid[1]); // check if job is successful assertEquals("Job not successful", JobStatus.SUCCEEDED, job.getStatus().getRunState()); // check if the trackers 1 got blacklisted assertTrue("Tracker 0 not blacklisted", jobTracker.getBlacklistedTrackers()[0].getTaskTrackerName() .equals(trackers[0])); // check if the tracker count is correct assertEquals("Active tracker count mismatch", 2, jobTracker.taskTrackers().size()); // validate blacklisted count assertEquals("Blacklisted tracker count mismatch", 1, jobTracker.getClusterStatus(false).getBlacklistedTrackers()); // Advance clock. Tracker 0 should be lost clock.advance(500); jobTracker.checkExpiredTrackers(); // check if the task tracker is lost assertFalse("Tracker 0 not lost", jobTracker.getClusterStatus(false).getActiveTrackerNames() .contains(trackers[0])); // check if the lost tracker has removed from the jobtracker assertEquals("Active tracker count mismatch", 1, jobTracker.taskTrackers().size()); // validate blacklisted count assertEquals("Blacklisted tracker count mismatch", 0, jobTracker.getClusterStatus(false).getBlacklistedTrackers()); } }