/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; import java.util.regex.Matcher; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.examples.SleepJob; import org.apache.hadoop.util.MemoryCalculatorPlugin; import org.apache.hadoop.util.ProcfsBasedProcessTree; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.TestProcfsBasedProcessTree; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.fs.FileSystem; import junit.framework.TestCase; /** * Test class to verify memory management of tasks. */ public class TestTaskTrackerMemoryManager extends TestCase { private static final Log LOG = LogFactory.getLog(TestTaskTrackerMemoryManager.class); private static String TEST_ROOT_DIR = new Path(System.getProperty( "test.build.data", "/tmp")).toString().replace(' ', '+'); private MiniMRCluster miniMRCluster; private String taskOverLimitPatternString = "TaskTree \\[pid=[0-9]*,tipID=.*\\] is running beyond memory-limits. " + "Current usage : [0-9]*bytes. Limit : %sbytes. Killing task."; private void startCluster(JobConf conf) throws Exception { conf.set("mapred.job.tracker.handler.count", "1"); conf.set("mapred.tasktracker.map.tasks.maximum", "1"); conf.set("mapred.tasktracker.reduce.tasks.maximum", "1"); conf.set("mapred.tasktracker.tasks.sleeptime-before-sigkill", "0"); miniMRCluster = new MiniMRCluster(1, "file:///", 1, null, null, conf); } @Override protected void tearDown() { if (miniMRCluster != null) { miniMRCluster.shutdown(); } } private void runSleepJob(JobConf conf) throws Exception { String[] args = { "-m", "3", "-r", "1", "-mt", "3000", "-rt", "1000" }; ToolRunner.run(conf, new SleepJob(), args); } private void runAndCheckSuccessfulJob(JobConf conf) throws IOException { Pattern taskOverLimitPattern = Pattern.compile(String.format(taskOverLimitPatternString, "[0-9]*")); Matcher mat = null; // Start the job. boolean success = true; try { runSleepJob(conf); success = true; } catch (Exception e) { success = false; } // Job has to succeed assertTrue(success); JobClient jClient = new JobClient(conf); JobStatus[] jStatus = jClient.getAllJobs(); JobStatus js = jStatus[0]; // Our only job RunningJob rj = jClient.getJob(js.getJobID()); // All events TaskCompletionEvent[] taskComplEvents = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent tce : taskComplEvents) { String[] diagnostics = rj.getTaskDiagnostics(tce.getTaskAttemptId()); if (diagnostics != null) { for (String str : diagnostics) { mat = taskOverLimitPattern.matcher(str); // The error pattern shouldn't be there in any TIP's diagnostics assertFalse(mat.find()); } } } } private boolean isProcfsBasedTreeAvailable() { try { if (!ProcfsBasedProcessTree.isAvailable()) { LOG.info("Currently ProcessTree has only one implementation " + "ProcfsBasedProcessTree, which is not available on this " + "system. Not testing"); return false; } } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); return false; } return true; } /** * Test for verifying that nothing is killed when memory management is * disabled on the TT, even when the tasks run over their limits. * * @throws Exception */ public void testTTLimitsDisabled() throws Exception { // Run the test only if memory management is enabled if (!isProcfsBasedTreeAvailable()) { return; } // Task-memory management disabled by default. startCluster(new JobConf()); long PER_TASK_LIMIT = 1L; // Doesn't matter how low. JobConf conf = miniMRCluster.createJobConf(); conf.setMemoryForMapTask(PER_TASK_LIMIT); conf.setMemoryForReduceTask(PER_TASK_LIMIT); runAndCheckSuccessfulJob(conf); } /** * Test for verifying that tasks within limits, with the cumulative usage also * under TT's limits succeed. * * @throws Exception */ public void testTasksWithinLimits() throws Exception { // Run the test only if memory management is enabled if (!isProcfsBasedTreeAvailable()) { return; } // Large so that sleepjob goes through and fits total TT usage long PER_TASK_LIMIT = 2 * 1024L; // Start cluster with proper configuration. JobConf fConf = new JobConf(); fConf.setLong(JobTracker.MAPRED_CLUSTER_MAP_MEMORY_MB_PROPERTY, 2 * 1024L); fConf.setLong( JobTracker.MAPRED_CLUSTER_REDUCE_MEMORY_MB_PROPERTY, 2 * 1024L); startCluster(new JobConf()); JobConf conf = new JobConf(miniMRCluster.createJobConf()); conf.setMemoryForMapTask(PER_TASK_LIMIT); conf.setMemoryForReduceTask(PER_TASK_LIMIT); runAndCheckSuccessfulJob(conf); } /** * Test for verifying that tasks that go beyond limits get killed. * * @throws Exception */ public void testTasksBeyondLimits() throws Exception { // Run the test only if memory management is enabled if (!isProcfsBasedTreeAvailable()) { return; } // Start cluster with proper configuration. JobConf fConf = new JobConf(); // very small value, so that no task escapes to successful completion. fConf.set("mapred.tasktracker.taskmemorymanager.monitoring-interval", String.valueOf(300)); fConf.setLong(JobTracker.MAPRED_CLUSTER_MAP_MEMORY_MB_PROPERTY, 2 * 1024); fConf.setLong( JobTracker.MAPRED_CLUSTER_REDUCE_MEMORY_MB_PROPERTY, 2 * 1024); startCluster(fConf); runJobExceedingMemoryLimit(); } /** * Runs tests with tasks beyond limit and using old configuration values for * the TaskTracker. * * @throws Exception */ public void testTaskMemoryMonitoringWithDeprecatedConfiguration () throws Exception { // Run the test only if memory management is enabled if (!isProcfsBasedTreeAvailable()) { return; } // Start cluster with proper configuration. JobConf fConf = new JobConf(); // very small value, so that no task escapes to successful completion. fConf.set("mapred.tasktracker.taskmemorymanager.monitoring-interval", String.valueOf(300)); //set old values, max vm property per task and upper limit on the tasks //vm //setting the default maximum vmem property to 2 GB fConf.setLong(JobConf.MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY, (2L * 1024L * 1024L * 1024L)); fConf.setLong(JobConf.UPPER_LIMIT_ON_TASK_VMEM_PROPERTY, (3L * 1024L * 1024L * 1024L)); startCluster(fConf); runJobExceedingMemoryLimit(); } /** * Runs a job which should fail the when run by the memory monitor. * * @throws IOException */ private void runJobExceedingMemoryLimit() throws IOException { long PER_TASK_LIMIT = 1L; // Low enough to kill off sleepJob tasks. Pattern taskOverLimitPattern = Pattern.compile(String.format(taskOverLimitPatternString, String .valueOf(PER_TASK_LIMIT*1024*1024L))); Matcher mat = null; // Set up job. JobConf conf = new JobConf(miniMRCluster.createJobConf()); conf.setMemoryForMapTask(PER_TASK_LIMIT); conf.setMemoryForReduceTask(PER_TASK_LIMIT); conf.setMaxMapAttempts(1); conf.setMaxReduceAttempts(1); // Start the job. boolean success = true; try { runSleepJob(conf); success = true; } catch (Exception e) { success = false; } // Job has to fail assertFalse(success); JobClient jClient = new JobClient(conf); JobStatus[] jStatus = jClient.getAllJobs(); JobStatus js = jStatus[0]; // Our only job RunningJob rj = jClient.getJob(js.getJobID()); // All events TaskCompletionEvent[] taskComplEvents = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent tce : taskComplEvents) { // Every task HAS to fail assert (tce.getTaskStatus() == TaskCompletionEvent.Status.TIPFAILED || tce .getTaskStatus() == TaskCompletionEvent.Status.FAILED); String[] diagnostics = rj.getTaskDiagnostics(tce.getTaskAttemptId()); // Every task HAS to spit out the out-of-memory errors assert (diagnostics != null); for (String str : diagnostics) { mat = taskOverLimitPattern.matcher(str); // Every task HAS to spit out the out-of-memory errors in the same // format. And these are the only diagnostic messages. assertTrue(mat.find()); } } } /** * Test for verifying that tasks causing cumulative usage to go beyond TT's * limit get killed even though they all are under individual limits. Memory * management for tasks with disabled task-limits also traverses the same * code-path, so we don't need a separate testTaskLimitsDisabled. * * @throws Exception */ public void testTasksCumulativelyExceedingTTLimits() throws Exception { // Run the test only if memory management is enabled if (!isProcfsBasedTreeAvailable()) { return; } // Large enough for SleepJob Tasks. long PER_TASK_LIMIT = 100 * 1024L; // Start cluster with proper configuration. JobConf fConf = new JobConf(); fConf.setLong(JobTracker.MAPRED_CLUSTER_MAP_MEMORY_MB_PROPERTY, 1L); fConf.setLong( JobTracker.MAPRED_CLUSTER_REDUCE_MEMORY_MB_PROPERTY, 1L); // Because of the above, the total tt limit is 2mb long TASK_TRACKER_LIMIT = 2 * 1024 * 1024L; // very small value, so that no task escapes to successful completion. fConf.set("mapred.tasktracker.taskmemorymanager.monitoring-interval", String.valueOf(300)); startCluster(fConf); Pattern taskOverLimitPattern = Pattern.compile( String.format(taskOverLimitPatternString, String.valueOf(PER_TASK_LIMIT))); Pattern trackerOverLimitPattern = Pattern.compile("Killing one of the least progress tasks - .*, as " + "the cumulative memory usage of all the tasks on the TaskTracker" + " host0.foo.com exceeds virtual memory limit " + TASK_TRACKER_LIMIT + "."); Matcher mat = null; // Set up job. JobConf conf = new JobConf(miniMRCluster.createJobConf()); conf.setMemoryForMapTask(PER_TASK_LIMIT); conf.setMemoryForReduceTask(PER_TASK_LIMIT); JobClient jClient = new JobClient(conf); SleepJob sleepJob = new SleepJob(); sleepJob.setConf(conf); // Start the job RunningJob job = jClient.submitJob(sleepJob.setupJobConf(1, 1, 5000, 1, 1000, 1)); boolean TTOverFlowMsgPresent = false; while (true) { List<TaskReport> allTaskReports = new ArrayList<TaskReport>(); allTaskReports.addAll(Arrays.asList(jClient .getSetupTaskReports((org.apache.hadoop.mapred.JobID) job.getID()))); allTaskReports.addAll(Arrays.asList(jClient .getMapTaskReports((org.apache.hadoop.mapred.JobID) job.getID()))); for (TaskReport tr : allTaskReports) { String[] diag = tr.getDiagnostics(); for (String str : diag) { mat = taskOverLimitPattern.matcher(str); assertFalse(mat.find()); mat = trackerOverLimitPattern.matcher(str); if (mat.find()) { TTOverFlowMsgPresent = true; } } } if (TTOverFlowMsgPresent) { break; } try { Thread.sleep(1000); } catch (InterruptedException e) { // nothing } } // If it comes here without a test-timeout, it means there was a task that // was killed because of crossing cumulative TT limit. // Test succeeded, kill the job. job.killJob(); } /** * Test to verify the check for whether a process tree is over limit or not. * @throws IOException if there was a problem setting up the * fake procfs directories or files. */ public void testProcessTreeLimits() throws IOException { // set up a dummy proc file system File procfsRootDir = new File(TEST_ROOT_DIR, "proc"); String[] pids = { "100", "200", "300", "400", "500", "600", "700" }; try { TestProcfsBasedProcessTree.setupProcfsRootDir(procfsRootDir); // create pid dirs. TestProcfsBasedProcessTree.setupPidDirs(procfsRootDir, pids); // create process infos. TestProcfsBasedProcessTree.ProcessStatInfo[] procs = new TestProcfsBasedProcessTree.ProcessStatInfo[7]; // assume pids 100, 500 are in 1 tree // 200,300,400 are in another // 600,700 are in a third procs[0] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"100", "proc1", "1", "100", "100", "100000"}); procs[1] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"200", "proc2", "1", "200", "200", "200000"}); procs[2] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"300", "proc3", "200", "200", "200", "300000"}); procs[3] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"400", "proc4", "200", "200", "200", "400000"}); procs[4] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"500", "proc5", "100", "100", "100", "1500000"}); procs[5] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"600", "proc6", "1", "600", "600", "100000"}); procs[6] = new TestProcfsBasedProcessTree.ProcessStatInfo( new String[] {"700", "proc7", "600", "600", "600", "100000"}); // write stat files. TestProcfsBasedProcessTree.writeStatFiles(procfsRootDir, pids, procs); // vmem limit long limit = 700000; // Create TaskMemoryMonitorThread TaskMemoryManagerThread test = new TaskMemoryManagerThread(1000000L, 5000L); // create process trees // tree rooted at 100 is over limit immediately, as it is // twice over the mem limit. ProcfsBasedProcessTree pTree = new ProcfsBasedProcessTree( "100", procfsRootDir.getAbsolutePath()); pTree.getProcessTree(); assertTrue("tree rooted at 100 should be over limit " + "after first iteration.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); // the tree rooted at 200 is initially below limit. pTree = new ProcfsBasedProcessTree("200", procfsRootDir.getAbsolutePath()); pTree.getProcessTree(); assertFalse("tree rooted at 200 shouldn't be over limit " + "after one iteration.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); // second iteration - now the tree has been over limit twice, // hence it should be declared over limit. pTree.getProcessTree(); assertTrue("tree rooted at 200 should be over limit after 2 iterations", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); // the tree rooted at 600 is never over limit. pTree = new ProcfsBasedProcessTree("600", procfsRootDir.getAbsolutePath()); pTree.getProcessTree(); assertFalse("tree rooted at 600 should never be over limit.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); // another iteration does not make any difference. pTree.getProcessTree(); assertFalse("tree rooted at 600 should never be over limit.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); } finally { FileUtil.fullyDelete(procfsRootDir); } } }