TestTaskLogsMonitor.java example

Explorer
hadoop-20-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;

import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TaskLog.LogFileDetail;
import org.apache.hadoop.mapred.TaskLog.LogName;
import org.apache.hadoop.mapred.lib.IdentityMapper;

import org.junit.After;
import org.junit.Test;

import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;

/**
 * Verify the logs' monitoring functionality.
 */
public class TestTaskLogsMonitor {

  static final Log LOG = LogFactory.getLog(TestTaskLogsMonitor.class);

  /**
   * clean-up any stale directories after enabling writable permissions for all
   * attempt-dirs.
   * 
   * @throws IOException
   */
  @After
  public void tearDown() throws IOException {
    File logDir = TaskLog.getUserLogDir();
    for (File attemptDir : logDir.listFiles()) {
      attemptDir.setWritable(true);
      FileUtil.fullyDelete(attemptDir);
    }
  }

  void writeRealBytes(TaskAttemptID firstAttemptID,
      TaskAttemptID attemptID, LogName logName, long numBytes, char data)
      throws IOException {

    File logFile = TaskLog.getTaskLogFile(firstAttemptID, logName);

    LOG.info("Going to write " + numBytes + " real bytes to the log file "
        + logFile);

    if (!logFile.getParentFile().exists()
        && !logFile.getParentFile().mkdirs()) {
      throw new IOException("Couldn't create all ancestor dirs for "
          + logFile);
    }

    File attemptDir = TaskLog.getBaseDir(attemptID.toString());
    if (!attemptDir.exists() && !attemptDir.mkdirs()) {
      throw new IOException("Couldn't create all ancestor dirs for "
          + logFile);
    }

    // Need to call up front to set currenttaskid.
    TaskLog.syncLogs(firstAttemptID, attemptID);

    FileWriter writer = new FileWriter(logFile, true);
    for (long i = 0; i < numBytes; i++) {
      writer.write(data);
    }
    writer.close();
    TaskLog.syncLogs(firstAttemptID, attemptID);
    LOG.info("Written " + numBytes + " real bytes to the log file "
        + logFile);
  }

  private static Map<LogName, Long> getAllLogsFileLengths(
      TaskAttemptID tid, boolean isCleanup) throws IOException {
    Map<LogName, Long> allLogsFileLengths = new HashMap<LogName, Long>();

    // If the index file doesn't exist, we cannot get log-file lengths. So set
    // them to zero.
    if (!TaskLog.getIndexFile(tid.toString(), isCleanup).exists()) {
      for (LogName log : LogName.values()) {
        allLogsFileLengths.put(log, Long.valueOf(0));
      }
      return allLogsFileLengths;
    }

    Map<LogName, LogFileDetail> logFilesDetails =
        TaskLog.getAllLogsFileDetails(tid, isCleanup);
    for (LogName log : logFilesDetails.keySet()) {
      allLogsFileLengths.put(log,
          Long.valueOf(logFilesDetails.get(log).length));
    }
    return allLogsFileLengths;
  }

  /**
   * Test cases which don't need any truncation of log-files. Without JVM-reuse.
   * 
   * @throws IOException
   */
  @Test
  public void testNoTruncationNeeded() throws IOException {
    TaskTracker taskTracker = new TaskTracker();
    TaskLogsMonitor logsMonitor = new TaskLogsMonitor(1000L, 1000L);
    taskTracker.setTaskLogsMonitor(logsMonitor);

    TaskID baseId = new TaskID();
    int taskcount = 0;

    TaskAttemptID attemptID = new TaskAttemptID(baseId, taskcount++);
    Task task = new MapTask(null, attemptID, 0, null, null, 0, null);

    // Let the tasks write logs within retain-size
    writeRealBytes(attemptID, attemptID, LogName.SYSLOG, 500, 'H');

    logsMonitor.monitorTaskLogs();
    File attemptDir = TaskLog.getBaseDir(attemptID.toString());
    assertTrue(attemptDir + " doesn't exist!", attemptDir.exists());

    // Finish the task and the JVM too.
    logsMonitor.addProcessForLogTruncation(attemptID, Arrays.asList(task));

    // There should be no truncation of the log-file.
    logsMonitor.monitorTaskLogs();
    assertTrue(attemptDir.exists());
    File logFile = TaskLog.getTaskLogFile(attemptID, LogName.SYSLOG);
    assertEquals(500, logFile.length());
    // The index file should also be proper.
    assertEquals(500, getAllLogsFileLengths(attemptID, false).get(
        LogName.SYSLOG).longValue());

    logsMonitor.monitorTaskLogs();
    assertEquals(500, logFile.length());
  }

  /**
   * Test the disabling of truncation of log-file.
   * 
   * @throws IOException
   */
  @Test
  public void testDisabledLogTruncation() throws IOException {
    TaskTracker taskTracker = new TaskTracker();
    // Anything less than 0 disables the truncation.
    TaskLogsMonitor logsMonitor = new TaskLogsMonitor(-1L, -1L);
    taskTracker.setTaskLogsMonitor(logsMonitor);

    TaskID baseId = new TaskID();
    int taskcount = 0;

    TaskAttemptID attemptID = new TaskAttemptID(baseId, taskcount++);
    Task task = new MapTask(null, attemptID, 0, null, null, 0, null);

    // Let the tasks write some logs
    writeRealBytes(attemptID, attemptID, LogName.SYSLOG, 1500, 'H');

    logsMonitor.monitorTaskLogs();
    File attemptDir = TaskLog.getBaseDir(attemptID.toString());
    assertTrue(attemptDir + " doesn't exist!", attemptDir.exists());

    // Finish the task and the JVM too.
    logsMonitor.addProcessForLogTruncation(attemptID, Arrays.asList(task));

    // The log-file should not be truncated.
    logsMonitor.monitorTaskLogs();
    assertTrue(attemptDir.exists());
    File logFile = TaskLog.getTaskLogFile(attemptID, LogName.SYSLOG);
    assertEquals(1500, logFile.length());
    // The index file should also be proper.
    assertEquals(1500, getAllLogsFileLengths(attemptID, false).get(
        LogName.SYSLOG).longValue());
  }

  /**
   * Test the truncation of log-file when JVMs are not reused.
   * 
   * @throws IOException
   */
  @Test
  public void testLogTruncationOnFinishing() throws IOException {
    TaskTracker taskTracker = new TaskTracker();
    TaskLogsMonitor logsMonitor = new TaskLogsMonitor(1000L, 1000L);
    taskTracker.setTaskLogsMonitor(logsMonitor);

    TaskID baseId = new TaskID();
    int taskcount = 0;

    TaskAttemptID attemptID = new TaskAttemptID(baseId, taskcount++);
    Task task = new MapTask(null, attemptID, 0, null, null, 0, null);

    // Let the tasks write logs more than retain-size
    writeRealBytes(attemptID, attemptID, LogName.SYSLOG, 1500, 'H');

    logsMonitor.monitorTaskLogs();
    File attemptDir = TaskLog.getBaseDir(attemptID.toString());
    assertTrue(attemptDir + " doesn't exist!", attemptDir.exists());

    // Finish the task and the JVM too.
    logsMonitor.addProcessForLogTruncation(attemptID, Arrays.asList(task));

    // The log-file should now be truncated.
    logsMonitor.monitorTaskLogs();
    assertTrue(attemptDir.exists());
    File logFile = TaskLog.getTaskLogFile(attemptID, LogName.SYSLOG);
    assertEquals(1000, logFile.length());
    // The index file should also be proper.
    assertEquals(1000, getAllLogsFileLengths(attemptID, false).get(
        LogName.SYSLOG).longValue());

    logsMonitor.monitorTaskLogs();
    assertEquals(1000, logFile.length());
  }

  /**
   * Test the truncation of log-file when JVM-reuse is enabled.
   * 
   * @throws IOException
   */
  @Test
  public void testLogTruncationOnFinishingWithJVMReuse() throws IOException {
    TaskTracker taskTracker = new TaskTracker();
    TaskLogsMonitor logsMonitor = new TaskLogsMonitor(150L, 150L);
    taskTracker.setTaskLogsMonitor(logsMonitor);

    TaskID baseTaskID = new TaskID();
    int attemptsCount = 0;

    // Assuming the job's retain size is 150
    TaskAttemptID attempt1 = new TaskAttemptID(baseTaskID, attemptsCount++);
    Task task1 = new MapTask(null, attempt1, 0, null, null, 0, null);

    // Let the tasks write logs more than retain-size
    writeRealBytes(attempt1, attempt1, LogName.SYSLOG, 200, 'A');

    logsMonitor.monitorTaskLogs();

    File attemptDir = TaskLog.getBaseDir(attempt1.toString());
    assertTrue(attemptDir + " doesn't exist!", attemptDir.exists());

    // Start another attempt in the same JVM
    TaskAttemptID attempt2 = new TaskAttemptID(baseTaskID, attemptsCount++);
    Task task2 = new MapTask(null, attempt2, 0, null, null, 0, null);
    logsMonitor.monitorTaskLogs();

    // Let attempt2 also write some logs
    writeRealBytes(attempt1, attempt2, LogName.SYSLOG, 100, 'B');
    logsMonitor.monitorTaskLogs();

    // Start yet another attempt in the same JVM
    TaskAttemptID attempt3 = new TaskAttemptID(baseTaskID, attemptsCount++);
    Task task3 = new MapTask(null, attempt3, 0, null, null, 0, null);
    logsMonitor.monitorTaskLogs();

    // Let attempt3 also write some logs
    writeRealBytes(attempt1, attempt3, LogName.SYSLOG, 225, 'C');
    logsMonitor.monitorTaskLogs();

    // Finish the JVM.
    logsMonitor.addProcessForLogTruncation(attempt1,
        Arrays.asList((new Task[] { task1, task2, task3 })));

    // The log-file should now be truncated.
    logsMonitor.monitorTaskLogs();
    assertTrue(attemptDir.exists());
    File logFile = TaskLog.getTaskLogFile(attempt1, LogName.SYSLOG);
    assertEquals(400, logFile.length());
    // The index files should also be proper.
    assertEquals(150, getAllLogsFileLengths(attempt1, false).get(
        LogName.SYSLOG).longValue());
    assertEquals(100, getAllLogsFileLengths(attempt2, false).get(
        LogName.SYSLOG).longValue());
    assertEquals(150, getAllLogsFileLengths(attempt3, false).get(
        LogName.SYSLOG).longValue());

    // assert the data.
    FileReader reader =
        new FileReader(TaskLog.getTaskLogFile(attempt1, LogName.SYSLOG));
    int ch, bytesRead = 0;
    boolean dataValid = true;
    while ((ch = reader.read()) != -1) {
      bytesRead++;
      if (bytesRead <= 150) {
        if ((char) ch != 'A') {
          LOG.warn("Truncation didn't happen properly. At "
              + (bytesRead + 1) + "th byte, expected 'A' but found "
              + (char) ch);
          dataValid = false;
        }
      } else if (bytesRead <= 250) {
        if ((char) ch != 'B') {
          LOG.warn("Truncation didn't happen properly. At "
              + (bytesRead + 1) + "th byte, expected 'B' but found "
              + (char) ch);
          dataValid = false;
        }
      } else if ((char) ch != 'C') {
        LOG.warn("Truncation didn't happen properly. At " + (bytesRead + 1)
            + "th byte, expected 'C' but found " + (char) ch);
        dataValid = false;
      }
    }
    assertTrue("Log-truncation didn't happen properly!", dataValid);

    logsMonitor.monitorTaskLogs();
    assertEquals(400, logFile.length());
  }

  private static String TEST_ROOT_DIR =
      new File(System.getProperty("test.build.data", "/tmp")).toURI().toString().replace(
          ' ', '+');

  public static class LoggingMapper<K, V> extends IdentityMapper<K, V> {

    public void map(K key, V val, OutputCollector<K, V> output,
        Reporter reporter) throws IOException {
      // Write lots of logs
      for (int i = 0; i < 1000; i++) {
        System.out.println("Lots of logs! Lots of logs! "
            + "Waiting to be truncated! Lots of logs!");
      }
      super.map(key, val, output, reporter);
    }
  }

  /**
   * Test logs monitoring with {@link MiniMRCluster}
   * 
   * @throws IOException
   */
  @Test
  public void testLogsMonitoringWithMiniMR() throws IOException {

    MiniMRCluster mr = null;
    try {
      JobConf clusterConf = new JobConf();
      clusterConf.setLong(TaskTracker.MAP_USERLOG_RETAIN_SIZE, 10000L);
      clusterConf.setLong(TaskTracker.REDUCE_USERLOG_RETAIN_SIZE, 10000L);
      mr = new MiniMRCluster(1, "file:///", 3, null, null, clusterConf);

      JobConf conf = mr.createJobConf();

      Path inDir = new Path(TEST_ROOT_DIR + "/input");
      Path outDir = new Path(TEST_ROOT_DIR + "/output");
      FileSystem fs = FileSystem.get(conf);
      if (fs.exists(outDir)) {
        fs.delete(outDir, true);
      }
      if (!fs.exists(inDir)) {
        fs.mkdirs(inDir);
      }
      String input = "The quick brown fox jumped over the lazy dog";
      DataOutputStream file = fs.create(new Path(inDir, "part-0"));
      file.writeBytes(input);
      file.close();

      conf.setInputFormat(TextInputFormat.class);
      conf.setOutputKeyClass(LongWritable.class);
      conf.setOutputValueClass(Text.class);

      FileInputFormat.setInputPaths(conf, inDir);
      FileOutputFormat.setOutputPath(conf, outDir);
      conf.setNumMapTasks(1);
      conf.setNumReduceTasks(0);
      conf.setMapperClass(LoggingMapper.class);

      RunningJob job = JobClient.runJob(conf);
      assertTrue(job.getJobState() == JobStatus.SUCCEEDED);
      for (TaskCompletionEvent tce : job.getTaskCompletionEvents(0)) {
        long length =
            TaskLog.getTaskLogFile(tce.getTaskAttemptId(),
                TaskLog.LogName.STDOUT).length();
        assertTrue("STDOUT log file length for " + tce.getTaskAttemptId()
            + " is " + length + " and not <=10000", length <= 10000);
      }
    } finally {
      if (mr != null) {
        mr.shutdown();
      }
    }
  }

  /**
   * Test the truncation of DEBUGOUT file by {@link TaskLogsMonitor}
   * @throws IOException 
   */
  @Test
  public void testDebugLogsTruncationWithMiniMR() throws IOException {

    MiniMRCluster mr = null;
    try {
      JobConf clusterConf = new JobConf();
      clusterConf.setLong(TaskTracker.MAP_USERLOG_RETAIN_SIZE, 10000L);
      clusterConf.setLong(TaskTracker.REDUCE_USERLOG_RETAIN_SIZE, 10000L);
      mr = new MiniMRCluster(1, "file:///", 3, null, null, clusterConf);

      JobConf conf = mr.createJobConf();

      Path inDir = new Path(TEST_ROOT_DIR + "/input");
      Path outDir = new Path(TEST_ROOT_DIR + "/output");
      FileSystem fs = FileSystem.get(conf);
      if (fs.exists(outDir)) {
        fs.delete(outDir, true);
      }
      if (!fs.exists(inDir)) {
        fs.mkdirs(inDir);
      }
      String input = "The quick brown fox jumped over the lazy dog";
      DataOutputStream file = fs.create(new Path(inDir, "part-0"));
      file.writeBytes(input);
      file.close();

      conf.setInputFormat(TextInputFormat.class);
      conf.setOutputKeyClass(LongWritable.class);
      conf.setOutputValueClass(Text.class);

      FileInputFormat.setInputPaths(conf, inDir);
      FileOutputFormat.setOutputPath(conf, outDir);
      conf.setNumMapTasks(1);
      conf.setMaxMapAttempts(1);
      conf.setNumReduceTasks(0);
      conf.setMapperClass(TestMiniMRMapRedDebugScript.MapClass.class);

      // copy debug script to cache from local file system.
      Path scriptPath = new Path(TEST_ROOT_DIR, "debug-script.txt");
      String debugScriptContent =
          "for ((i=0;i<1000;i++)); " + "do "
              + "echo \"Lots of logs! Lots of logs! "
              + "Waiting to be truncated! Lots of logs!\";" + "done";
      DataOutputStream scriptFile = fs.create(scriptPath);
      scriptFile.writeBytes(debugScriptContent);
      scriptFile.close();
      new File(scriptPath.toUri().getPath()).setExecutable(true);

      URI uri = scriptPath.toUri();
      DistributedCache.createSymlink(conf);
      DistributedCache.addCacheFile(uri, conf);
      conf.setMapDebugScript(scriptPath.toUri().getPath());

      RunningJob job = null;
      try {
        JobClient jc = new JobClient(conf);
        job = jc.submitJob(conf);
        try {
          jc.monitorAndPrintJob(conf, job);
        } catch (InterruptedException e) {
          //
        }
      } catch (IOException ioe) {
      } finally{
        for (TaskCompletionEvent tce : job.getTaskCompletionEvents(0)) {
          File debugOutFile =
              TaskLog.getTaskLogFile(tce.getTaskAttemptId(),
                  TaskLog.LogName.DEBUGOUT);
          if (debugOutFile.exists()) {
            long length = debugOutFile.length();
            assertTrue("DEBUGOUT log file length for "
                + tce.getTaskAttemptId() + " is " + length
                + " and not =10000", length == 10000);
          }
        }
      }
    } finally { 
      if (mr != null) {
        mr.shutdown();
      }
    }
  }
  /**
   * Test the TaskLog.cleanup() where if the number of log files exceed the limit by {@link TaskLogsMonitor}
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
  public void testLogsFilesLimit() throws IOException, InterruptedException {
    JobConf conf = new JobConf();
		conf.setLong("mapred.userlog.cleanup.interval", 10000);
		conf.setInt("mapred.userlog.retain.hours", 24);
		conf.setInt("mapred.userlog.files.limit", 50);
		TaskTracker tracker = new TaskTracker();
		tracker.setConf(conf);
    File f;
    int numFiles = 200;
    for (int i = 0; i < numFiles; i++) {
      String filename = TaskLog.getUserLogDir() + File.separator +  "file_" + i;
      System.out.println(filename);
      f = new File(filename);
      f.createNewFile();
      f.setLastModified(System.currentTimeMillis() + i *60*1000);
    }
    tracker.startLogCleanupThread();
    Thread.sleep(900);

    assertEquals("TaskLog.cleanup() may not clean up half of the older logs!", 
    (numFiles + 1) / 2, TaskLog.getUserLogDir().listFiles().length);
  }
}