TestRumenJobTraces.java example

Explorer
bigpetstore-master
- hadoop-1.2.1
  - src
- src
  - integration
    - java
      - org
        bigtop
        bigpetstore
        integration
        BigPetStoreHiveIT.java
        BigPetStoreMahoutIT.java
        BigPetStorePigIT.java
        ITUtils.java
  - main
    - java
      - org
        bigtop
        bigpetstore
        clustering
        BPSRecommnder.java
        MahoutClusterTransactionsByRegion.java
        contract
        PetStoreStatistics.java
        etl
        CrunchETL.java
        HiveViewCreator.java
        LineItem.java
        PigCSVCleaner.java
        generator
        BPSGenerator.java
        GeneratePetStoreTransactionsInputFormat.java
        PetStoreTransaction.java
        PetStoreTransactionInputSplit.java
        TransactionIteratorFactory.java
        util
        BigPetStoreConstants.java
        DeveloperTools.java
        NumericalIdUtils.java
        Pair.java
        PetStoreParseFunctions.java
        StringUtils.java
  - test
    - java
      - org
        bigtop
        bigpetstore
        docs
        TestDocs.java
        generator
        TestNumericalIdUtils.java
        TestPetStoreTransactionGeneratorJob.java
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.tools.rumen;

import java.io.BufferedInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobHistory;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.UtilsForTests;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.tools.rumen.TraceBuilder.MyOptions;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import org.junit.Test;
import static org.junit.Assert.*;

public class TestRumenJobTraces {
  private static final Log LOG = LogFactory.getLog(TestRumenJobTraces.class);

  @Test
  public void testSmallTrace() throws Exception {
    performSingleTest("sample-job-tracker-logs.gz",
        "job-tracker-logs-topology-output", "job-tracker-logs-trace-output.gz");
  }

  @Test
  public void testTruncatedTask() throws Exception {
    performSingleTest("truncated-job-tracker-log", "truncated-topology-output",
        "truncated-trace-output");
  }

  private void performSingleTest(String jtLogName, String goldTopology,
      String goldTrace) throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootInputDir =
        new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
            lfs);
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
            lfs);

    final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test");
    final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces");
    lfs.delete(tempDir, true);

    final Path topologyFile = new Path(tempDir, jtLogName + "-topology.json");
    final Path traceFile = new Path(tempDir, jtLogName + "-trace.json");

    final Path inputFile = new Path(rootInputFile, jtLogName);

    System.out.println("topology result file = " + topologyFile);
    System.out.println("trace result file = " + traceFile);

    String[] args = new String[6];

    args[0] = "-v1";

    args[1] = "-write-topology";
    args[2] = topologyFile.toString();

    args[3] = "-write-job-trace";
    args[4] = traceFile.toString();

    args[5] = inputFile.toString();

    final Path topologyGoldFile = new Path(rootInputFile, goldTopology);
    final Path traceGoldFile = new Path(rootInputFile, goldTrace);

    @SuppressWarnings("deprecation")
    HadoopLogsAnalyzer analyzer = new HadoopLogsAnalyzer();
    int result = ToolRunner.run(analyzer, args);
    assertEquals("Non-zero exit", 0, result);

    TestRumenJobTraces
        .<LoggedNetworkTopology> jsonFileMatchesGold(conf, topologyFile,
            topologyGoldFile, LoggedNetworkTopology.class, "topology");
    TestRumenJobTraces.<LoggedJob> jsonFileMatchesGold(conf, traceFile,
        traceGoldFile, LoggedJob.class, "trace");
  }

  @Test
  public void testRumenViaDispatch() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootInputDir =
        new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
            lfs);
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
            lfs);

    final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
    final Path tempDir = new Path(rootTempDir, "TestRumenViaDispatch");
    lfs.delete(tempDir, true);

    final Path topologyPath = new Path(tempDir, "dispatch-topology.json");
    final Path tracePath = new Path(tempDir, "dispatch-trace.json");

    final Path inputPath =
        new Path(rootInputPath, "dispatch-sample-v20-jt-log.gz");

    System.out.println("topology result file = " + topologyPath);
    System.out.println("testRumenViaDispatch() trace result file = " + tracePath);

    String demuxerClassName = ConcatenatedInputFilesDemuxer.class.getName();

    String[] args =
        { "-demuxer", demuxerClassName, tracePath.toString(),
            topologyPath.toString(), inputPath.toString() };

    final Path topologyGoldFile =
        new Path(rootInputPath, "dispatch-topology-output.json.gz");
    final Path traceGoldFile =
        new Path(rootInputPath, "dispatch-trace-output.json.gz");

    Tool analyzer = new TraceBuilder();
    int result = ToolRunner.run(analyzer, args);
    assertEquals("Non-zero exit", 0, result);

    TestRumenJobTraces
        .<LoggedNetworkTopology> jsonFileMatchesGold(conf, topologyPath,
            topologyGoldFile, LoggedNetworkTopology.class, "topology");
    TestRumenJobTraces.<LoggedJob> jsonFileMatchesGold(conf, tracePath,
        traceGoldFile, LoggedJob.class, "trace");
  }

  @Test
  public void testBracketedCounters() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootInputDir =
        new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
            lfs);
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
            lfs);

    final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
    final Path tempDir = new Path(rootTempDir, "TestBracketedCounters");
    lfs.delete(tempDir, true);

    final Path topologyPath = new Path(tempDir, "dispatch-topology.json");
    final Path tracePath = new Path(tempDir, "dispatch-trace.json");

    final Path inputPath = new Path(rootInputPath, "counters-format-test-logs");

    System.out.println("topology result file = " + topologyPath);
    System.out.println("testBracketedCounters() trace result file = " + tracePath);

    final Path goldPath =
        new Path(rootInputPath, "counters-test-trace.json.gz");

    String[] args =
        { tracePath.toString(), topologyPath.toString(), inputPath.toString() };

    Tool analyzer = new TraceBuilder();
    int result = ToolRunner.run(analyzer, args);
    assertEquals("Non-zero exit", 0, result);

    TestRumenJobTraces.<LoggedJob> jsonFileMatchesGold(conf, tracePath,
        goldPath, LoggedJob.class, "trace");
  }

  /**
   * Verify if the obtainXXX methods of {@link ParsedJob}, {@link ParsedTask}
   * and {@link ParsedTaskAttempt} give valid info
   */
  private void validateParsedJob(ParsedJob parsedJob, int numMaps,
      int numReduces, String queueName) {
    validateParsedJobAPI(parsedJob, numMaps, numReduces, queueName);

    List<ParsedTask> maps = parsedJob.obtainMapTasks();
    for (ParsedTask task : maps) {
      validateParsedTask(task);
    }
    List<ParsedTask> reduces = parsedJob.obtainReduceTasks();
    for (ParsedTask task : reduces) {
      validateParsedTask(task);
    }
    List<ParsedTask> others = parsedJob.obtainOtherTasks();
    for (ParsedTask task : others) {
      validateParsedTask(task);
    }
  }

  /** Verify if the obtainXXX methods of {@link ParsedJob} give valid info */
  private void validateParsedJobAPI(ParsedJob parsedJob, int numMaps,
      int numReduces, String queueName) {
    LOG.info("Validating ParsedJob.obtainXXX api... for "
             + parsedJob.getJobID());
    assertNotNull("Job acls in ParsedJob is null",
                  parsedJob.obtainJobAcls());
    assertNotNull("Job conf path in ParsedJob is null",
                  parsedJob.obtainJobConfpath());
    assertEquals("Job queue in ParsedJob is wrong",
                 queueName, parsedJob.getQueue());

    assertNotNull("Map Counters in ParsedJob is null",
                  parsedJob.obtainMapCounters());
    assertNotNull("Reduce Counters in ParsedJob is null",
                  parsedJob.obtainReduceCounters());
    assertNotNull("Total Counters in ParsedJob is null",
                  parsedJob.obtainTotalCounters());

    assertNotNull("Map Tasks List in ParsedJob is null",
                  parsedJob.obtainMapTasks());
    assertNotNull("Reduce Tasks List in ParsedJob is null",
                  parsedJob.obtainReduceTasks());
    assertNotNull("Other Tasks List in ParsedJob is null",
                  parsedJob.obtainOtherTasks());

    // 1 map and 1 reduce task should be there
    assertEquals("Number of map tasks in ParsedJob is wrong",
                 numMaps, parsedJob.obtainMapTasks().size());
    assertEquals("Number of reduce tasks in ParsedJob is wrong",
                 numReduces, parsedJob.obtainReduceTasks().size(), 1);

    assertTrue("Total Counters in ParsedJob is empty",
               parsedJob.obtainTotalCounters().size() > 0);
    // Current 0.20 history files contain job-level-map-counters and
    // job-level-reduce-counters. Older 0.20 history files may not have them.
    assertTrue("Map Counters in ParsedJob is empty",
               parsedJob.obtainMapCounters().size() > 0);
    assertTrue("Reduce Counters in ParsedJob is empty",
        parsedJob.obtainReduceCounters().size() > 0);
  }

  /**
   * Verify if the obtainXXX methods of {@link ParsedTask} and
   * {@link ParsedTaskAttempt} give valid info
   */
  private void validateParsedTask(ParsedTask parsedTask) {
    validateParsedTaskAPI(parsedTask);

    List<ParsedTaskAttempt> attempts = parsedTask.obtainTaskAttempts();
    for (ParsedTaskAttempt attempt : attempts) {
      validateParsedTaskAttemptAPI(attempt);
    }
  }

  /** Verify if the obtainXXX methods of {@link ParsedTask} give valid info */
  private void validateParsedTaskAPI(ParsedTask parsedTask) {
    LOG.info("Validating ParsedTask.obtainXXX api... for "
             + parsedTask.getTaskID());
    assertNotNull("Task counters in ParsedTask is null",
                  parsedTask.obtainCounters());

    if (parsedTask.getTaskStatus()
        == Pre21JobHistoryConstants.Values.SUCCESS) {
      // task counters should not be empty
      assertTrue("Task counters in ParsedTask is empty",
                 parsedTask.obtainCounters().size() > 0);
      assertNull("Diagnostic-info is non-null for a succeeded task",
                 parsedTask.obtainDiagnosticInfo());
      assertNull("Failed-due-to-attemptId is non-null for a succeeded task",
                 parsedTask.obtainFailedDueToAttemptId());
    } else {
      assertNotNull("Diagnostic-info is non-null for a succeeded task",
                    parsedTask.obtainDiagnosticInfo());
      assertNotNull("Failed-due-to-attemptId is non-null for a succeeded task",
                    parsedTask.obtainFailedDueToAttemptId());
    }

    List<ParsedTaskAttempt> attempts = parsedTask.obtainTaskAttempts();
    assertNotNull("TaskAttempts list in ParsedTask is null", attempts);
    assertTrue("TaskAttempts list in ParsedTask is empty",
               attempts.size() > 0);    
  }

  /**
   * Verify if the obtainXXX methods of {@link ParsedTaskAttempt} give
   * valid info
   */
  private void validateParsedTaskAttemptAPI(
      ParsedTaskAttempt parsedTaskAttempt) {
    LOG.info("Validating ParsedTaskAttempt.obtainXXX api... for "
             + parsedTaskAttempt.getAttemptID());
    assertNotNull("Counters in ParsedTaskAttempt is null",
                  parsedTaskAttempt.obtainCounters());

    if (parsedTaskAttempt.getResult()
        == Pre21JobHistoryConstants.Values.SUCCESS) { 
      assertTrue("Counters in ParsedTaskAttempt is empty",
               parsedTaskAttempt.obtainCounters().size() > 0);
      assertNull("Diagnostic-info is non-null for a succeeded taskAttempt",
                 parsedTaskAttempt.obtainDiagnosticInfo());
    } else {
      assertNotNull("Diagnostic-info is non-null for a succeeded taskAttempt",
                 parsedTaskAttempt.obtainDiagnosticInfo());
    }
    assertNotNull("TrackerName in ParsedTaskAttempt is null",
                  parsedTaskAttempt.obtainTrackerName());

    assertNotNull("http-port info in ParsedTaskAttempt is null",
        parsedTaskAttempt.obtainHttpPort());
  }

  @Test
  public void testHadoop20JHParser() throws Exception {
    // Disabled
    if (true) return;

    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    boolean success = false;

    final Path rootInputDir =
        new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
            lfs);
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
            lfs);

    final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
    final Path tempDir = new Path(rootTempDir, "TestHadoop20JHParser");
    lfs.delete(tempDir, true);

    final Path inputPath = new Path(rootInputPath, "v20-single-input-log.gz");
    final Path goldPath =
        new Path(rootInputPath, "v20-single-input-log-event-classes.text.gz");

    InputStream inputLogStream =
        new PossiblyDecompressedInputStream(inputPath, conf);

    InputStream inputGoldStream =
        new PossiblyDecompressedInputStream(goldPath, conf);

    BufferedInputStream bis = new BufferedInputStream(inputLogStream);
    bis.mark(10000);
    Hadoop20JHParser parser = new Hadoop20JHParser(bis);

    final Path resultPath = new Path(tempDir, "result.text");

    System.out.println("testHadoop20JHParser sent its output to " + resultPath);

    Compressor compressor;

    FileSystem fs = resultPath.getFileSystem(conf);
    CompressionCodec codec =
        new CompressionCodecFactory(conf).getCodec(resultPath);
    OutputStream output;
    if (codec != null) {
      compressor = CodecPool.getCompressor(codec);
      output = codec.createOutputStream(fs.create(resultPath), compressor);
    } else {
      output = fs.create(resultPath);
    }

    PrintStream printStream = new PrintStream(output);

    try {
      assertEquals("Hadoop20JHParser can't parse the test file", true,
          Hadoop20JHParser.canParse(inputLogStream));

      bis.reset();

      HistoryEvent event = parser.nextEvent();

      while (event != null) {
        printStream.println(event.getClass().getCanonicalName());
        event = parser.nextEvent();
      }

      printStream.close();

      LineReader goldLines = new LineReader(inputGoldStream);
      LineReader resultLines =
          new LineReader(new PossiblyDecompressedInputStream(resultPath, conf));

      int lineNumber = 1;

      try {
        Text goldLine = new Text();
        Text resultLine = new Text();

        int goldRead = goldLines.readLine(goldLine);
        int resultRead = resultLines.readLine(resultLine);

        while (goldRead * resultRead != 0) {
          if (!goldLine.equals(resultLine)) {
            assertEquals("Type mismatch detected", goldLine, resultLine);
            break;
          }

          goldRead = goldLines.readLine(goldLine);
          resultRead = resultLines.readLine(resultLine);

          ++lineNumber;
        }

        if (goldRead != resultRead) {
          assertEquals("the " + (goldRead > resultRead ? "gold" : resultRead)
              + " file contains more text at line " + lineNumber, goldRead,
              resultRead);
        }

        success = true;
      } finally {
        goldLines.close();
        resultLines.close();

        if (success) {
          lfs.delete(resultPath, false);
        }
      }

    } finally {
      if (parser == null) {
        inputLogStream.close();
      } else {
        if (parser != null) {
          parser.close();
        }
      }

      if (inputGoldStream != null) {
        inputGoldStream.close();
      }

      // it's okay to do this twice [if we get an error on input]
      printStream.close();
    }
  }

  /**
   * Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
   * filenames. The testcase checks if {@link TraceBuilder}
   *   - correctly identifies a jobhistory filename
   *   - correctly parses a jobhistory filename to extract out the jobid
   *   - correctly identifies a job-configuration filename stored along with the 
   *     jobhistory files
   */
  @Test
  public void testJobHistoryFilenameParsing() throws IOException {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);
    String user = "test";
    org.apache.hadoop.mapred.JobID jid = 
      new org.apache.hadoop.mapred.JobID("12345", 1);
    final Path rootInputDir =
      new Path(System.getProperty("test.tools.input.dir", ""))
      .makeQualified(lfs);
    
    // Check if jobhistory filename are detected properly
    Path jhFilename = new Path(jid + "_1234_user_jobname");
    JobID extractedJID = 
      JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
    assertEquals("TraceBuilder failed to parse the current JH filename", 
                 jid, extractedJID);
    
    // Check if the conf filename in jobhistory are detected properly
    Path jhConfFilename = new Path(jid + "_conf.xml");
    assertTrue("TraceBuilder failed to parse the current JH conf filename", 
               TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
  }

  /**
   * Check if processing of input arguments is as expected by passing globbed
   * input path
   * <li> without -recursive option and
   * <li> with -recursive option.
   */
  @Test
  public void testProcessInputArgument() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temporary directory
    final Path rootTempDir =
      new Path(System.getProperty("test.build.data", "/tmp"))
          .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
    // define the test's root input directory
    Path testRootInputDir = new Path(rootTempDir, "TestProcessInputArgument");
    // define the nested input directory
    Path nestedInputDir = new Path(testRootInputDir, "1/2/3/4");
    // define the globbed version of the nested input directory
    Path globbedInputNestedDir =
      lfs.makeQualified(new Path(testRootInputDir, "*/*/*/*/*"));
    try {
      lfs.delete(nestedInputDir, true);

      List<String> recursiveInputPaths = new ArrayList<String>();
      List<String> nonRecursiveInputPaths = new ArrayList<String>();
      // Create input files under the given path with multiple levels of
      // sub directories
      createHistoryLogsHierarchy(nestedInputDir, lfs, recursiveInputPaths,
          nonRecursiveInputPaths);

      // Check the case of globbed input path and without -recursive option
      List<Path> inputs = MyOptions.processInputArgument(
                              globbedInputNestedDir.toString(), conf, false);
      validateHistoryLogPaths(inputs, nonRecursiveInputPaths);
   // Check the case of globbed input path and with -recursive option
      inputs = MyOptions.processInputArgument(
                   globbedInputNestedDir.toString(), conf, true);
      validateHistoryLogPaths(inputs, recursiveInputPaths);

    } finally {
      lfs.delete(testRootInputDir, true);
    }
  }

  /**
   * Validate if the input history log paths are as expected.
   * @param inputs  the resultant input paths to be validated
   * @param expectedHistoryFileNames  the expected input history logs
   * @throws IOException
   */
  private void validateHistoryLogPaths(List<Path> inputs,
      List<String> expectedHistoryFileNames) throws IOException {

    System.out.println("\nExpected history files are:");
    for (String historyFile : expectedHistoryFileNames) {
      System.out.println(historyFile);
    }
    System.out.println("\nResultant history files are:");
    List<String> historyLogs = new ArrayList<String>();
    for (Path p : inputs) {
      historyLogs.add(p.toUri().getPath());
      System.out.println(p.toUri().getPath());
    }

    assertEquals("Number of history logs found is different from the expected.",
        expectedHistoryFileNames.size(), inputs.size());

    // Verify if all the history logs are expected ones and they are in the
    // expected order
    assertTrue("Some of the history log files do not match the expected.",
        historyLogs.equals(expectedHistoryFileNames));
  }

  /**
   * Create history logs under the given path with multiple levels of
   * sub directories as shown below.
   * <br>
   * Create a file, an empty subdirectory and a nonempty subdirectory
   * <historyDir> under the given input path.
   * <br>
   * The subdirectory <historyDir> contains the following dir structure:
   * <br>
   * <br><historyDir>/historyFile1.txt
   * <br><historyDir>/historyFile1.gz
   * <br><historyDir>/subDir1/historyFile2.txt
   * <br><historyDir>/subDir1/historyFile2.gz
   * <br><historyDir>/subDir2/historyFile3.txt
   * <br><historyDir>/subDir2/historyFile3.gz
   * <br><historyDir>/subDir1/subDir11/historyFile4.txt
   * <br><historyDir>/subDir1/subDir11/historyFile4.gz
   * <br><historyDir>/subDir2/subDir21/
   * <br>
   * Create the lists of input paths that should be processed by TraceBuilder
   * for recursive case and non-recursive case.
   * @param nestedInputDir the input history logs directory where history files
   *                       with nested subdirectories are created
   * @param fs         FileSystem of the input paths
   * @param recursiveInputPaths input paths for recursive case
   * @param nonRecursiveInputPaths input paths for non-recursive case
   * @throws IOException
   */
  private void createHistoryLogsHierarchy(Path nestedInputDir, FileSystem fs,
      List<String> recursiveInputPaths, List<String> nonRecursiveInputPaths)
  throws IOException {
    List<Path> dirs = new ArrayList<Path>();
    // define a file in the nested test input directory
    Path inputPath1 = new Path(nestedInputDir, "historyFile.txt");
    // define an empty sub-folder in the nested test input directory
    Path emptyDir = new Path(nestedInputDir, "emptyDir");
    // define a nonempty sub-folder in the nested test input directory
    Path historyDir = new Path(nestedInputDir, "historyDir");

    fs.mkdirs(nestedInputDir);
    // Create an empty input file
    fs.createNewFile(inputPath1);
    // Create empty subdir
    fs.mkdirs(emptyDir);// let us not create any files under this dir

    fs.mkdirs(historyDir);
    dirs.add(historyDir);

    Path subDir1 = new Path(historyDir, "subDir1");
    fs.mkdirs(subDir1);
    dirs.add(subDir1);
    Path subDir2 = new Path(historyDir, "subDir2");
    fs.mkdirs(subDir2);
    dirs.add(subDir2);

    Path subDir11 = new Path(subDir1, "subDir11");
    fs.mkdirs(subDir11);
    dirs.add(subDir11);
    Path subDir21 = new Path(subDir2, "subDir21");
    fs.mkdirs(subDir21);// let us not create any files under this dir

    int i = 0;
    for (Path dir : dirs) {
      i++;
      Path gzPath = new Path(dir, "historyFile" + i + ".gz");
      Path txtPath = new Path(dir, "historyFile" + i + ".txt");
      fs.createNewFile(txtPath);
      fs.createNewFile(gzPath);
      recursiveInputPaths.add(gzPath.toUri().getPath());
      recursiveInputPaths.add(txtPath.toUri().getPath());
      if (i == 1) {
        nonRecursiveInputPaths.add(gzPath.toUri().getPath());
        nonRecursiveInputPaths.add(txtPath.toUri().getPath());
      }
    }
    recursiveInputPaths.add(inputPath1.toUri().getPath());
    nonRecursiveInputPaths.add(inputPath1.toUri().getPath());
  }

    /**
   * Test if {@link CurrentJHParser} can read events from current JH files.
   */
  @Test
  public void testCurrentJHParser() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootTempDir =
      new Path(System.getProperty("test.build.data", "/tmp"))
          .makeQualified(lfs);

    final Path tempDir = new Path(rootTempDir, "TestCurrentJHParser");
    lfs.delete(tempDir, true);
    
    String queueName = "testQueue";
    // Run a MR job
    // create a MR cluster
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
    conf.set("mapred.queue.names", queueName);
    MiniMRCluster mrCluster = new MiniMRCluster(1, "file:///", 1, null, null, 
                                                new JobConf(conf));
    
    // run a job
    Path inDir = new Path(tempDir, "input");
    Path outDir = new Path(tempDir, "output");
    JobHistoryParser parser = null;
    RewindableInputStream ris = null;
    ArrayList<String> seenEvents = new ArrayList<String>(10);
    RunningJob rJob = null;
    
    try {
      JobConf jobConf = mrCluster.createJobConf();
      jobConf.setQueueName(queueName);
      // construct a job with 1 map and 1 reduce task.
      rJob = UtilsForTests.runJob(jobConf, inDir, outDir, 1, 
                                  1);
      rJob.waitForCompletion();
      assertTrue("Job failed", rJob.isSuccessful());
      
      JobID id = rJob.getID();

      // get the jobhistory filepath
      Path inputPath =  
        new Path(JobHistory.getHistoryFilePath(
            org.apache.hadoop.mapred.JobID.downgrade(id)));
      // wait for 10 secs for the jobhistory file to move into the done folder
      for (int i = 0; i < 100; ++i) {
        if (lfs.exists(inputPath)) {
          break;
        }
        TimeUnit.MILLISECONDS.wait(100);
      }
    
      assertTrue("Missing job history file", lfs.exists(inputPath));
    
      InputDemuxer inputDemuxer = new DefaultInputDemuxer();
      inputDemuxer.bindTo(inputPath, conf);
    
      Pair<String, InputStream> filePair = inputDemuxer.getNext();
    
      assertNotNull(filePair);
    
      ris = new RewindableInputStream(filePair.second());

      // Test if the JobHistoryParserFactory can detect the parser correctly
      parser = JobHistoryParserFactory.getParser(ris);

      // Get ParsedJob
      String jobId = TraceBuilder.extractJobID(filePair.first());
      JobBuilder builder = new JobBuilder(jobId);

      HistoryEvent e;
      while ((e = parser.nextEvent()) != null) {
        String eventString = e.getEventType().toString();
        System.out.println("event " + eventString);
        seenEvents.add(eventString);
        if (builder != null) {
          builder.process(e);
        }
      }

      ParsedJob parsedJob = builder.build();
      // validate the obtainXXX api of ParsedJob, ParsedTask and
      // ParsedTaskAttempt.
      validateParsedJob(parsedJob, 1, 1, queueName);
    } finally {
      // stop the MR cluster
      mrCluster.shutdown();
      
      if (ris != null) {
          ris.close();
      }
      if (parser != null) {
        parser.close();
      }
      
      // cleanup the filesystem
      lfs.delete(tempDir, true);
    }

    // Check against the gold standard
    System.out.println("testCurrentJHParser validating using gold std ");
    String[] goldLines = new String[] {"JOB_SUBMITTED", "JOB_PRIORITY_CHANGED",
        "JOB_STATUS_CHANGED", "JOB_INITED", "JOB_INFO_CHANGED", "TASK_STARTED",
        "SETUP_ATTEMPT_STARTED", "SETUP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED",
        "TASK_UPDATED", "TASK_FINISHED", "JOB_STATUS_CHANGED", "TASK_STARTED",
        "MAP_ATTEMPT_STARTED", "MAP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED",
        "TASK_UPDATED", "TASK_FINISHED", "TASK_STARTED", "REDUCE_ATTEMPT_STARTED",
        "REDUCE_ATTEMPT_FINISHED", "REDUCE_ATTEMPT_FINISHED", "TASK_UPDATED",
        "TASK_FINISHED", "TASK_STARTED", "CLEANUP_ATTEMPT_STARTED", 
        "CLEANUP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED", "TASK_UPDATED",
        "TASK_FINISHED", "JOB_STATUS_CHANGED", "JOB_FINISHED"};
    
    // Check the output with gold std
    assertEquals("Size mismatch", goldLines.length, seenEvents.size());
    
    int index = 0;
    for (String goldLine : goldLines) {
      assertEquals("Content mismatch", goldLine, seenEvents.get(index++));
    }
  }
  
    /**
     * Test if the {@link JobConfigurationParser} can correctly extract out 
     * key-value pairs from the job configuration.
     */
    @Test
    public void testJobConfigurationParsing() throws Exception {
      final FileSystem lfs = FileSystem.getLocal(new Configuration());
  
      final Path rootTempDir =
          new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
              lfs.getUri(), lfs.getWorkingDirectory());
  
      final Path tempDir = new Path(rootTempDir, "TestJobConfigurationParser");
      lfs.delete(tempDir, true);
  
      // Add some configuration parameters to the conf
      JobConf jConf = new JobConf(false);
      String key = "test.data";
      String value = "hello world";
      jConf.set(key, value);
      
      // create the job conf file
      Path jobConfPath = new Path(tempDir.toString(), "job.xml");
      lfs.delete(jobConfPath, false);
      DataOutputStream jobConfStream = lfs.create(jobConfPath);
      jConf.writeXml(jobConfStream);
      jobConfStream.close();
      
      // now read the job conf file using the job configuration parser
      Properties properties = 
        JobConfigurationParser.parse(lfs.open(jobConfPath));
      
      // check if the required parameter is loaded
      assertEquals("Total number of extracted properties (" + properties.size() 
                   + ") doesn't match the expected size of 1 ["
                   + "JobConfigurationParser]",
                   1, properties.size());
      // check if the key is present in the extracted configuration
      assertTrue("Key " + key + " is missing in the configuration extracted "
                 + "[JobConfigurationParser]",
                 properties.keySet().contains(key));
      // check if the desired property has the correct value
      assertEquals("JobConfigurationParser couldn't recover the parameters"
                   + " correctly",
                  value, properties.get(key));
      
      // Test ZombieJob
      LoggedJob job = new LoggedJob();
      job.setJobProperties(properties);
      
      ZombieJob zjob = new ZombieJob(job, null);
      Configuration zconf = zjob.getJobConf();
      // check if the required parameter is loaded
      assertEquals("ZombieJob couldn't recover the parameters correctly", 
                   value, zconf.get(key));
    }

    @Test
    public void testJobConfigurationParser() throws Exception {

      // Validate parser with old mapred config properties from
      // sample-conf-file.xml
      validateJobConfParser("sample-conf.file.xml");
    }

    private void validateJobConfParser(String confFile) throws Exception {

      final Configuration conf = new Configuration();
      final FileSystem lfs = FileSystem.getLocal(conf);

      @SuppressWarnings("deprecation")
      final Path rootInputDir =
          new Path(System.getProperty("test.tools.input.dir", ""))
              .makeQualified(lfs);

      final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");

      final Path inputPath = new Path(rootInputPath, confFile);

      InputStream inputConfStream =
          new PossiblyDecompressedInputStream(inputPath, conf);

      try {
        Properties props = JobConfigurationParser.parse(inputConfStream);
        inputConfStream.close();

        // Make sure that parser puts the interested properties into props1 and
        // props2 as defined by list1 and list2.
        assertEquals("Config property for job queue name is not "
            + " extracted properly.", "TheQueue",
            JobBuilder.extract(props, JobConfPropertyNames.QUEUE_NAMES
            .getCandidates(), null));
        assertEquals("Config property for job name is not "
            + " extracted properly.", "MyMRJob",
            JobBuilder.extract(props, JobConfPropertyNames.JOB_NAMES
            .getCandidates(), null));

        validateChildJavaOpts(props);

      } finally {
        inputConfStream.close();
      }
    }
    
    // Validate child java opts in properties.
    private void validateChildJavaOpts(Properties props) {
      // if old property mapred.child.java.opts is set, then extraction of all
      // the following 3 properties should give that value.
      assertEquals("mapred.child.java.opts is not extracted properly.",
          "-server -Xmx640m -Djava.net.preferIPv4Stack=true",
          JobBuilder.extract(props, JobConfPropertyNames.TASK_JAVA_OPTS_S
          .getCandidates(), null));
      assertEquals("New config property " + JobConf.MAPRED_MAP_TASK_JAVA_OPTS
          + " is not extracted properly when the old config property "
          + "mapred.child.java.opts is set.",
          "-server -Xmx640m -Djava.net.preferIPv4Stack=true",
          JobBuilder.extract(props, JobConfPropertyNames.MAP_JAVA_OPTS_S
          .getCandidates(), null));
      assertEquals("New config property " + JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS
          + " is not extracted properly when the old config property "
          + "mapred.child.java.opts is set.",
          "-server -Xmx640m -Djava.net.preferIPv4Stack=true",
          JobBuilder.extract(props, JobConfPropertyNames.REDUCE_JAVA_OPTS_S
          .getCandidates(), null));
    }

  /**
   * Test {@link ResourceUsageMetrics}.
   */
  @Test
  public void testResourceUsageMetrics() throws Exception {
    final long cpuUsage = 100;
    final long pMemUsage = 200;
    final long vMemUsage = 300;
    final long heapUsage = 400;
    
    // test ResourceUsageMetrics's setters
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    metrics.setCumulativeCpuUsage(cpuUsage);
    metrics.setPhysicalMemoryUsage(pMemUsage);
    metrics.setVirtualMemoryUsage(vMemUsage);
    metrics.setHeapUsage(heapUsage);
    // test cpu usage value
    assertEquals("Cpu usage values mismatch via set", cpuUsage, 
                 metrics.getCumulativeCpuUsage());
    // test pMem usage value
    assertEquals("Physical memory usage values mismatch via set", pMemUsage, 
                 metrics.getPhysicalMemoryUsage());
    // test vMem usage value
    assertEquals("Virtual memory usage values mismatch via set", vMemUsage, 
                 metrics.getVirtualMemoryUsage());
    // test heap usage value
    assertEquals("Heap usage values mismatch via set", heapUsage, 
                 metrics.getHeapUsage());
    
    // test deepCompare() (pass case)
    testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, vMemUsage, 
                                          pMemUsage, heapUsage, true);
    
    // test deepCompare (fail case)
    // test cpu usage mismatch
    testResourceUsageMetricViaDeepCompare(metrics, 0, vMemUsage, pMemUsage, 
                                          heapUsage, false);
    // test pMem usage mismatch
    testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, vMemUsage, 0, 
                                          heapUsage, false);
    // test vMem usage mismatch
    testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, 0, pMemUsage, 
                                          heapUsage, false);
    // test heap usage mismatch
    testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, vMemUsage, 
                                          pMemUsage, 0, false);
    
    // define a metric with a fixed value of size()
    ResourceUsageMetrics metrics2 = new ResourceUsageMetrics() {
      @Override
      public int size() {
        return -1;
      }
    };
    metrics2.setCumulativeCpuUsage(cpuUsage);
    metrics2.setPhysicalMemoryUsage(pMemUsage);
    metrics2.setVirtualMemoryUsage(vMemUsage);
    metrics2.setHeapUsage(heapUsage);
    
    // test with size mismatch
    testResourceUsageMetricViaDeepCompare(metrics2, cpuUsage, vMemUsage, 
                                          pMemUsage, heapUsage, false);
  }
  
  // test ResourceUsageMetric's deepCompare() method
  private static void testResourceUsageMetricViaDeepCompare(
                        ResourceUsageMetrics metrics, long cpuUsage, 
                        long vMemUsage, long pMemUsage, long heapUsage,
                        boolean shouldPass) {
    ResourceUsageMetrics testMetrics = new ResourceUsageMetrics();
    testMetrics.setCumulativeCpuUsage(cpuUsage);
    testMetrics.setPhysicalMemoryUsage(pMemUsage);
    testMetrics.setVirtualMemoryUsage(vMemUsage);
    testMetrics.setHeapUsage(heapUsage);
    
    Boolean passed = null;
    try {
      metrics.deepCompare(testMetrics, new TreePath(null, "<root>"));
      passed = true;
    } catch (DeepInequalityException die) {
      passed = false;
    }
    
    assertEquals("ResourceUsageMetrics deepCompare() failed!", 
                 shouldPass, passed);
  }
  
  /**
   * Testing {@link ResourceUsageMetrics} using {@link HadoopLogsAnalyzer}.
   */
  @Test
  @SuppressWarnings("deprecation")
  public void testResourceUsageMetricsWithHadoopLogsAnalyzer() 
  throws IOException {
    Configuration conf = new Configuration();
    // get the input trace file
    Path rootInputDir =
      new Path(System.getProperty("test.tools.input.dir", ""));
    Path rootInputSubFolder = new Path(rootInputDir, "rumen/small-trace-test");
    Path traceFile = new Path(rootInputSubFolder, "v20-resource-usage-log.gz");
    
    FileSystem lfs = FileSystem.getLocal(conf);
    
    // define the root test directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"));

    // define output directory
    Path outputDir = 
      new Path(rootTempDir, "testResourceUsageMetricsWithHadoopLogsAnalyzer");
    lfs.delete(outputDir, true);
    lfs.deleteOnExit(outputDir);
    
    // run HadoopLogsAnalyzer
    HadoopLogsAnalyzer analyzer = new HadoopLogsAnalyzer();
    analyzer.setConf(conf);
    Path traceOutput = new Path(outputDir, "trace.json");
    analyzer.run(new String[] {"-write-job-trace", traceOutput.toString(), 
                               "-v1", traceFile.toString()});
    
    // test HadoopLogsAnalyzer's output w.r.t ResourceUsageMetrics
    //  get the logged job
    JsonObjectMapperParser<LoggedJob> traceParser =
      new JsonObjectMapperParser<LoggedJob>(traceOutput, LoggedJob.class, 
                                            conf);
    
    //  get the logged job from the output trace file
    LoggedJob job = traceParser.getNext();
    LoggedTaskAttempt attempt = job.getMapTasks().get(0).getAttempts().get(0);
    ResourceUsageMetrics metrics = attempt.getResourceUsageMetrics();
    
    //  test via deepCompare()
    testResourceUsageMetricViaDeepCompare(metrics, 200, 100, 75, 50, true);
  }
  
  @Test
  public void testTopologyBuilder() throws Exception {
    final TopologyBuilder subject = new TopologyBuilder();

    // currently we extract no host names from the Properties
    subject.process(new Properties());

    subject.process(new TaskAttemptFinishedEvent(TaskAttemptID
        .forName("attempt_200904211745_0003_m_000004_0"), TaskType
        .valueOf("MAP"), "STATUS", 1234567890L,
        "/194\\.6\\.134\\.64/cluster50261\\.secondleveldomain\\.com",
        "SUCCESS", null));
    subject.process(new TaskAttemptUnsuccessfulCompletionEvent(TaskAttemptID
        .forName("attempt_200904211745_0003_m_000004_1"), TaskType
        .valueOf("MAP"), "STATUS", 1234567890L,
        "/194\\.6\\.134\\.80/cluster50262\\.secondleveldomain\\.com",
        "MACHINE_EXPLODED"));
    subject.process(new TaskAttemptUnsuccessfulCompletionEvent(TaskAttemptID
        .forName("attempt_200904211745_0003_m_000004_2"), TaskType
        .valueOf("MAP"), "STATUS", 1234567890L,
        "/194\\.6\\.134\\.80/cluster50263\\.secondleveldomain\\.com",
        "MACHINE_EXPLODED"));
    subject.process(new TaskStartedEvent(TaskID
        .forName("task_200904211745_0003_m_000004"), 1234567890L, TaskType
        .valueOf("MAP"),
        "/194\\.6\\.134\\.80/cluster50263\\.secondleveldomain\\.com"));

    final LoggedNetworkTopology topology = subject.build();

    List<LoggedNetworkTopology> racks = topology.getChildren();

    assertEquals("Wrong number of racks", 2, racks.size());

    boolean sawSingleton = false;
    boolean sawDoubleton = false;

    for (LoggedNetworkTopology rack : racks) {
      List<LoggedNetworkTopology> nodes = rack.getChildren();
      if (rack.getName().endsWith(".64")) {
        assertEquals("The singleton rack has the wrong number of elements", 1,
            nodes.size());
        sawSingleton = true;
      } else if (rack.getName().endsWith(".80")) {
        assertEquals("The doubleton rack has the wrong number of elements", 2,
            nodes.size());
        sawDoubleton = true;
      } else {
        assertTrue("Unrecognized rack name", false);
      }
    }

    assertTrue("Did not see singleton rack", sawSingleton);
    assertTrue("Did not see doubleton rack", sawDoubleton);
  }

  static private <T extends DeepCompare> void jsonFileMatchesGold(
      Configuration conf, Path result, Path gold, Class<? extends T> clazz,
      String fileDescription) throws IOException {
    JsonObjectMapperParser<T> goldParser =
        new JsonObjectMapperParser<T>(gold, clazz, conf);
    JsonObjectMapperParser<T> resultParser =
        new JsonObjectMapperParser<T>(result, clazz, conf);
    try {
      while (true) {
        DeepCompare goldJob = goldParser.getNext();
        DeepCompare resultJob = resultParser.getNext();
        if ((goldJob == null) || (resultJob == null)) {
          assertTrue(goldJob == resultJob);
          break;
        }

        try {
          resultJob.deepCompare(goldJob, new TreePath(null, "<root>"));
        } catch (DeepInequalityException e) {
          String error = e.path.toString();

          assertFalse(fileDescription + " mismatches: " + error, true);
        }
      }
    } finally {
      IOUtils.cleanup(null, goldParser, resultParser);
    }
  }
}