/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.regex.Pattern; import java.util.regex.Matcher; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.examples.WordCount; import org.apache.hadoop.util.ProcfsBasedProcessTree; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.Tool; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import junit.framework.TestCase; public class TestTaskTrackerMemoryManager extends TestCase { private static final Log LOG = LogFactory.getLog(TestTaskTrackerMemoryManager.class); private MiniDFSCluster miniDFSCluster; private MiniMRCluster miniMRCluster; private void startCluster(JobConf conf) throws Exception { miniDFSCluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fileSys = miniDFSCluster.getFileSystem(); String namenode = fileSys.getUri().toString(); miniMRCluster = new MiniMRCluster(1, namenode, 1, null, null, conf); } @Override protected void tearDown() { if (miniMRCluster != null) { miniMRCluster.shutdown(); } if (miniDFSCluster != null) { miniDFSCluster.shutdown(); } } private void runWordCount(JobConf conf) throws Exception { Path input = new Path("input.txt"); Path output = new Path("output"); OutputStream os = miniDFSCluster.getFileSystem().create(input); Writer wr = new OutputStreamWriter(os); wr.write("hello1\n"); wr.write("hello2\n"); wr.write("hello3\n"); wr.write("hello4\n"); wr.close(); Tool WordCount = new WordCount(); if (conf != null) { WordCount.setConf(conf); } ToolRunner.run(WordCount, new String[] { input.toString(), output.toString() }); } public void testNormalTaskAndLimitedTT() throws Exception { // Run the test only if memory management is enabled try { if (!ProcfsBasedProcessTree.isAvailable()) { LOG.info("Currently ProcessTree has only one implementation " + "ProcfsBasedProcessTree, which is not available on this " + "system. Not testing"); return; } } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); return; } Pattern diagMsgPattern = Pattern .compile("TaskTree \\[pid=[0-9]*,tipID=.*\\] is running beyond " + "memory-limits. Current usage : [0-9]*kB. Limit : [0-9]*kB. Killing task."); Matcher mat = null; // Start cluster with proper configuration. JobConf fConf = new JobConf(); fConf.setLong("mapred.tasktracker.tasks.maxmemory", Long.valueOf(10000000000L)); // Fairly large value for WordCount to succeed startCluster(fConf); // Set up job. JobConf conf = new JobConf(); JobTracker jt = miniMRCluster.getJobTrackerRunner().getJobTracker(); conf.set("mapred.job.tracker", jt.getJobTrackerMachine() + ":" + jt.getTrackerPort()); NameNode nn = miniDFSCluster.getNameNode(); conf.set("fs.default.name", "hdfs://" + nn.getNameNodeAddress().getHostName() + ":" + nn.getNameNodeAddress().getPort()); // Start the job. boolean success = true; try { runWordCount(conf); success = true; } catch (Exception e) { success = false; } // Job has to succeed assertTrue(success); // Alas, we don't have a way to get job id/Task completion events from // WordCount JobClient jClient = new JobClient(conf); JobStatus[] jStatus = jClient.getAllJobs(); JobStatus js = jStatus[0]; // Our only job RunningJob rj = jClient.getJob(js.getJobID()); // All events TaskCompletionEvent[] taskComplEvents = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent tce : taskComplEvents) { String[] diagnostics = jClient.jobSubmitClient.getTaskDiagnostics(tce .getTaskAttemptId()); if (diagnostics != null) { for (String str : diagnostics) { mat = diagMsgPattern.matcher(str); // The error pattern shouldn't be there in any TIP's diagnostics assertFalse(mat.find()); } } } } public void testOOMTaskAndLimitedTT() throws Exception { // Run the test only if memory management is enabled try { if (!ProcfsBasedProcessTree.isAvailable()) { LOG.info("Currently ProcessTree has only one implementation " + "ProcfsBasedProcessTree, which is not available on this " + "system. Not testing"); return; } } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); return; } long PER_TASK_LIMIT = 444; // Enough to kill off WordCount. Pattern diagMsgPattern = Pattern .compile("TaskTree \\[pid=[0-9]*,tipID=.*\\] is running beyond " + "memory-limits. Current usage : [0-9]*kB. Limit : " + PER_TASK_LIMIT + "kB. Killing task."); Matcher mat = null; // Start cluster with proper configuration. JobConf fConf = new JobConf(); fConf.setLong("mapred.tasktracker.tasks.maxmemory", Long.valueOf(100000)); fConf.set("mapred.tasktracker.taskmemorymanager.monitoring-interval", String.valueOf(300)); //very small value, so that no task escapes to successful completion. startCluster(fConf); // Set up job. JobConf conf = new JobConf(); conf.setMaxVirtualMemoryForTask(PER_TASK_LIMIT); JobTracker jt = miniMRCluster.getJobTrackerRunner().getJobTracker(); conf.set("mapred.job.tracker", jt.getJobTrackerMachine() + ":" + jt.getTrackerPort()); NameNode nn = miniDFSCluster.getNameNode(); conf.set("fs.default.name", "hdfs://" + nn.getNameNodeAddress().getHostName() + ":" + nn.getNameNodeAddress().getPort()); // Start the job. boolean success = true; try { runWordCount(conf); success = true; } catch (Exception e) { success = false; } // Job has to fail assertFalse(success); // Alas, we don't have a way to get job id/Task completion events from // WordCount JobClient jClient = new JobClient(conf); JobStatus[] jStatus = jClient.getAllJobs(); JobStatus js = jStatus[0]; // Our only job RunningJob rj = jClient.getJob(js.getJobID()); // All events TaskCompletionEvent[] taskComplEvents = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent tce : taskComplEvents) { // Every task HAS to fail assert (tce.getTaskStatus() == TaskCompletionEvent.Status.TIPFAILED || tce .getTaskStatus() == TaskCompletionEvent.Status.FAILED); String[] diagnostics = jClient.jobSubmitClient.getTaskDiagnostics(tce .getTaskAttemptId()); // Every task HAS to spit out the out-of-memory errors assert (diagnostics != null); for (String str : diagnostics) { mat = diagMsgPattern.matcher(str); // Every task HAS to spit out the out-of-memory errors in the same // format. And these are the only diagnostic messages. assertTrue(mat.find()); } } } }