TestTaskTrackerMemoryManager.java example

Explorer
Quatrain-MapReduce-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.examples.WordCount;
import org.apache.hadoop.util.ProcfsBasedProcessTree;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import junit.framework.TestCase;

public class TestTaskTrackerMemoryManager extends TestCase {

  private static final Log LOG = LogFactory.getLog(TestTaskTrackerMemoryManager.class);
  private MiniDFSCluster miniDFSCluster;
  private MiniMRCluster miniMRCluster;

  private void startCluster(JobConf conf) throws Exception {
    miniDFSCluster = new MiniDFSCluster(conf, 1, true, null);
    FileSystem fileSys = miniDFSCluster.getFileSystem();
    String namenode = fileSys.getUri().toString();
    miniMRCluster = new MiniMRCluster(1, namenode, 1, null, null, conf);
  }

  @Override
  protected void tearDown() {
    if (miniMRCluster != null) {
      miniMRCluster.shutdown();
    }
    if (miniDFSCluster != null) {
      miniDFSCluster.shutdown();
    }
  }

  private void runWordCount(JobConf conf) throws Exception {
    Path input = new Path("input.txt");
    Path output = new Path("output");

    OutputStream os = miniDFSCluster.getFileSystem().create(input);
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello1\n");
    wr.write("hello2\n");
    wr.write("hello3\n");
    wr.write("hello4\n");
    wr.close();

    Tool WordCount = new WordCount();
    if (conf != null) {
      WordCount.setConf(conf);
    }
    ToolRunner.run(WordCount, new String[] { input.toString(),
        output.toString() });
  }

  public void testNormalTaskAndLimitedTT() throws Exception {
    // Run the test only if memory management is enabled

    try {
      if (!ProcfsBasedProcessTree.isAvailable()) {
        LOG.info("Currently ProcessTree has only one implementation "
            + "ProcfsBasedProcessTree, which is not available on this "
            + "system. Not testing");
        return;
      }
    } catch (Exception e) {
      LOG.info(StringUtils.stringifyException(e));
      return;
    }

    Pattern diagMsgPattern = Pattern
        .compile("TaskTree \\[pid=[0-9]*,tipID=.*\\] is running beyond "
            + "memory-limits. Current usage : [0-9]*kB. Limit : [0-9]*kB. Killing task.");
    Matcher mat = null;

    // Start cluster with proper configuration.
    JobConf fConf = new JobConf();

    fConf.setLong("mapred.tasktracker.tasks.maxmemory", 
                      Long.valueOf(10000000000L)); // Fairly large value for WordCount to succeed
    startCluster(fConf);

    // Set up job.
    JobConf conf = new JobConf();
    JobTracker jt = miniMRCluster.getJobTrackerRunner().getJobTracker();
    conf.set("mapred.job.tracker", jt.getJobTrackerMachine() + ":"
        + jt.getTrackerPort());
    NameNode nn = miniDFSCluster.getNameNode();
    conf.set("fs.default.name", "hdfs://"
        + nn.getNameNodeAddress().getHostName() + ":"
        + nn.getNameNodeAddress().getPort());

    // Start the job.
    boolean success = true;
    try {
      runWordCount(conf);
      success = true;
    } catch (Exception e) {
      success = false;
    }

    // Job has to succeed
    assertTrue(success);

    // Alas, we don't have a way to get job id/Task completion events from
    // WordCount
    JobClient jClient = new JobClient(conf);
    JobStatus[] jStatus = jClient.getAllJobs();
    JobStatus js = jStatus[0]; // Our only job
    RunningJob rj = jClient.getJob(js.getJobID());

    // All events
    TaskCompletionEvent[] taskComplEvents = rj.getTaskCompletionEvents(0);

    for (TaskCompletionEvent tce : taskComplEvents) {
      String[] diagnostics = jClient.jobSubmitClient.getTaskDiagnostics(tce
          .getTaskAttemptId());

      if (diagnostics != null) {
        for (String str : diagnostics) {
          mat = diagMsgPattern.matcher(str);
          // The error pattern shouldn't be there in any TIP's diagnostics
          assertFalse(mat.find());
        }
      }
    }
  }

  public void testOOMTaskAndLimitedTT() throws Exception {

    // Run the test only if memory management is enabled

    try {
      if (!ProcfsBasedProcessTree.isAvailable()) {
        LOG.info("Currently ProcessTree has only one implementation "
            + "ProcfsBasedProcessTree, which is not available on this "
            + "system. Not testing");
        return;
      }
    } catch (Exception e) {
      LOG.info(StringUtils.stringifyException(e));
      return;
    }

    long PER_TASK_LIMIT = 444; // Enough to kill off WordCount.
    Pattern diagMsgPattern = Pattern
        .compile("TaskTree \\[pid=[0-9]*,tipID=.*\\] is running beyond "
            + "memory-limits. Current usage : [0-9]*kB. Limit : "
            + PER_TASK_LIMIT + "kB. Killing task.");
    Matcher mat = null;

    // Start cluster with proper configuration.
    JobConf fConf = new JobConf();
    fConf.setLong("mapred.tasktracker.tasks.maxmemory", Long.valueOf(100000));
    fConf.set("mapred.tasktracker.taskmemorymanager.monitoring-interval", String.valueOf(300));
            //very small value, so that no task escapes to successful completion.
    startCluster(fConf);

    // Set up job.
    JobConf conf = new JobConf();
    conf.setMaxVirtualMemoryForTask(PER_TASK_LIMIT);
    JobTracker jt = miniMRCluster.getJobTrackerRunner().getJobTracker();
    conf.set("mapred.job.tracker", jt.getJobTrackerMachine() + ":"
        + jt.getTrackerPort());
    NameNode nn = miniDFSCluster.getNameNode();
    conf.set("fs.default.name", "hdfs://"
        + nn.getNameNodeAddress().getHostName() + ":"
        + nn.getNameNodeAddress().getPort());

    // Start the job.
    boolean success = true;
    try {
      runWordCount(conf);
      success = true;
    } catch (Exception e) {
      success = false;
    }

    // Job has to fail
    assertFalse(success);

    // Alas, we don't have a way to get job id/Task completion events from
    // WordCount
    JobClient jClient = new JobClient(conf);
    JobStatus[] jStatus = jClient.getAllJobs();
    JobStatus js = jStatus[0]; // Our only job
    RunningJob rj = jClient.getJob(js.getJobID());

    // All events
    TaskCompletionEvent[] taskComplEvents = rj.getTaskCompletionEvents(0);

    for (TaskCompletionEvent tce : taskComplEvents) {
      // Every task HAS to fail
      assert (tce.getTaskStatus() == TaskCompletionEvent.Status.TIPFAILED || tce
          .getTaskStatus() == TaskCompletionEvent.Status.FAILED);

      String[] diagnostics = jClient.jobSubmitClient.getTaskDiagnostics(tce
          .getTaskAttemptId());

      // Every task HAS to spit out the out-of-memory errors
      assert (diagnostics != null);

      for (String str : diagnostics) {
        mat = diagMsgPattern.matcher(str);
        // Every task HAS to spit out the out-of-memory errors in the same
        // format. And these are the only diagnostic messages.
        assertTrue(mat.find());
      }
    }
  }
}