/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.rumen;
import java.io.BufferedInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobHistory;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.UtilsForTests;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.tools.rumen.TraceBuilder.MyOptions;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestRumenJobTraces {
private static final Log LOG = LogFactory.getLog(TestRumenJobTraces.class);
@Test
public void testSmallTrace() throws Exception {
performSingleTest("sample-job-tracker-logs.gz",
"job-tracker-logs-topology-output", "job-tracker-logs-trace-output.gz");
}
@Test
public void testTruncatedTask() throws Exception {
performSingleTest("truncated-job-tracker-log", "truncated-topology-output",
"truncated-trace-output");
}
private void performSingleTest(String jtLogName, String goldTopology,
String goldTrace) throws Exception {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
lfs);
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
lfs);
final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test");
final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces");
lfs.delete(tempDir, true);
final Path topologyFile = new Path(tempDir, jtLogName + "-topology.json");
final Path traceFile = new Path(tempDir, jtLogName + "-trace.json");
final Path inputFile = new Path(rootInputFile, jtLogName);
System.out.println("topology result file = " + topologyFile);
System.out.println("trace result file = " + traceFile);
String[] args = new String[6];
args[0] = "-v1";
args[1] = "-write-topology";
args[2] = topologyFile.toString();
args[3] = "-write-job-trace";
args[4] = traceFile.toString();
args[5] = inputFile.toString();
final Path topologyGoldFile = new Path(rootInputFile, goldTopology);
final Path traceGoldFile = new Path(rootInputFile, goldTrace);
@SuppressWarnings("deprecation")
HadoopLogsAnalyzer analyzer = new HadoopLogsAnalyzer();
int result = ToolRunner.run(analyzer, args);
assertEquals("Non-zero exit", 0, result);
TestRumenJobTraces
.<LoggedNetworkTopology> jsonFileMatchesGold(conf, topologyFile,
topologyGoldFile, LoggedNetworkTopology.class, "topology");
TestRumenJobTraces.<LoggedJob> jsonFileMatchesGold(conf, traceFile,
traceGoldFile, LoggedJob.class, "trace");
}
@Test
public void testRumenViaDispatch() throws Exception {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
lfs);
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
lfs);
final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
final Path tempDir = new Path(rootTempDir, "TestRumenViaDispatch");
lfs.delete(tempDir, true);
final Path topologyPath = new Path(tempDir, "dispatch-topology.json");
final Path tracePath = new Path(tempDir, "dispatch-trace.json");
final Path inputPath =
new Path(rootInputPath, "dispatch-sample-v20-jt-log.gz");
System.out.println("topology result file = " + topologyPath);
System.out.println("testRumenViaDispatch() trace result file = " + tracePath);
String demuxerClassName = ConcatenatedInputFilesDemuxer.class.getName();
String[] args =
{ "-demuxer", demuxerClassName, tracePath.toString(),
topologyPath.toString(), inputPath.toString() };
final Path topologyGoldFile =
new Path(rootInputPath, "dispatch-topology-output.json.gz");
final Path traceGoldFile =
new Path(rootInputPath, "dispatch-trace-output.json.gz");
Tool analyzer = new TraceBuilder();
int result = ToolRunner.run(analyzer, args);
assertEquals("Non-zero exit", 0, result);
TestRumenJobTraces
.<LoggedNetworkTopology> jsonFileMatchesGold(conf, topologyPath,
topologyGoldFile, LoggedNetworkTopology.class, "topology");
TestRumenJobTraces.<LoggedJob> jsonFileMatchesGold(conf, tracePath,
traceGoldFile, LoggedJob.class, "trace");
}
@Test
public void testBracketedCounters() throws Exception {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
lfs);
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
lfs);
final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
final Path tempDir = new Path(rootTempDir, "TestBracketedCounters");
lfs.delete(tempDir, true);
final Path topologyPath = new Path(tempDir, "dispatch-topology.json");
final Path tracePath = new Path(tempDir, "dispatch-trace.json");
final Path inputPath = new Path(rootInputPath, "counters-format-test-logs");
System.out.println("topology result file = " + topologyPath);
System.out.println("testBracketedCounters() trace result file = " + tracePath);
final Path goldPath =
new Path(rootInputPath, "counters-test-trace.json.gz");
String[] args =
{ tracePath.toString(), topologyPath.toString(), inputPath.toString() };
Tool analyzer = new TraceBuilder();
int result = ToolRunner.run(analyzer, args);
assertEquals("Non-zero exit", 0, result);
TestRumenJobTraces.<LoggedJob> jsonFileMatchesGold(conf, tracePath,
goldPath, LoggedJob.class, "trace");
}
/**
* Verify if the obtainXXX methods of {@link ParsedJob}, {@link ParsedTask}
* and {@link ParsedTaskAttempt} give valid info
*/
private void validateParsedJob(ParsedJob parsedJob, int numMaps,
int numReduces, String queueName) {
validateParsedJobAPI(parsedJob, numMaps, numReduces, queueName);
List<ParsedTask> maps = parsedJob.obtainMapTasks();
for (ParsedTask task : maps) {
validateParsedTask(task);
}
List<ParsedTask> reduces = parsedJob.obtainReduceTasks();
for (ParsedTask task : reduces) {
validateParsedTask(task);
}
List<ParsedTask> others = parsedJob.obtainOtherTasks();
for (ParsedTask task : others) {
validateParsedTask(task);
}
}
/** Verify if the obtainXXX methods of {@link ParsedJob} give valid info */
private void validateParsedJobAPI(ParsedJob parsedJob, int numMaps,
int numReduces, String queueName) {
LOG.info("Validating ParsedJob.obtainXXX api... for "
+ parsedJob.getJobID());
assertNotNull("Job acls in ParsedJob is null",
parsedJob.obtainJobAcls());
assertNotNull("Job conf path in ParsedJob is null",
parsedJob.obtainJobConfpath());
assertEquals("Job queue in ParsedJob is wrong",
queueName, parsedJob.getQueue());
assertNotNull("Map Counters in ParsedJob is null",
parsedJob.obtainMapCounters());
assertNotNull("Reduce Counters in ParsedJob is null",
parsedJob.obtainReduceCounters());
assertNotNull("Total Counters in ParsedJob is null",
parsedJob.obtainTotalCounters());
assertNotNull("Map Tasks List in ParsedJob is null",
parsedJob.obtainMapTasks());
assertNotNull("Reduce Tasks List in ParsedJob is null",
parsedJob.obtainReduceTasks());
assertNotNull("Other Tasks List in ParsedJob is null",
parsedJob.obtainOtherTasks());
// 1 map and 1 reduce task should be there
assertEquals("Number of map tasks in ParsedJob is wrong",
numMaps, parsedJob.obtainMapTasks().size());
assertEquals("Number of reduce tasks in ParsedJob is wrong",
numReduces, parsedJob.obtainReduceTasks().size(), 1);
assertTrue("Total Counters in ParsedJob is empty",
parsedJob.obtainTotalCounters().size() > 0);
// Current 0.20 history files contain job-level-map-counters and
// job-level-reduce-counters. Older 0.20 history files may not have them.
assertTrue("Map Counters in ParsedJob is empty",
parsedJob.obtainMapCounters().size() > 0);
assertTrue("Reduce Counters in ParsedJob is empty",
parsedJob.obtainReduceCounters().size() > 0);
}
/**
* Verify if the obtainXXX methods of {@link ParsedTask} and
* {@link ParsedTaskAttempt} give valid info
*/
private void validateParsedTask(ParsedTask parsedTask) {
validateParsedTaskAPI(parsedTask);
List<ParsedTaskAttempt> attempts = parsedTask.obtainTaskAttempts();
for (ParsedTaskAttempt attempt : attempts) {
validateParsedTaskAttemptAPI(attempt);
}
}
/** Verify if the obtainXXX methods of {@link ParsedTask} give valid info */
private void validateParsedTaskAPI(ParsedTask parsedTask) {
LOG.info("Validating ParsedTask.obtainXXX api... for "
+ parsedTask.getTaskID());
assertNotNull("Task counters in ParsedTask is null",
parsedTask.obtainCounters());
if (parsedTask.getTaskStatus()
== Pre21JobHistoryConstants.Values.SUCCESS) {
// task counters should not be empty
assertTrue("Task counters in ParsedTask is empty",
parsedTask.obtainCounters().size() > 0);
assertNull("Diagnostic-info is non-null for a succeeded task",
parsedTask.obtainDiagnosticInfo());
assertNull("Failed-due-to-attemptId is non-null for a succeeded task",
parsedTask.obtainFailedDueToAttemptId());
} else {
assertNotNull("Diagnostic-info is non-null for a succeeded task",
parsedTask.obtainDiagnosticInfo());
assertNotNull("Failed-due-to-attemptId is non-null for a succeeded task",
parsedTask.obtainFailedDueToAttemptId());
}
List<ParsedTaskAttempt> attempts = parsedTask.obtainTaskAttempts();
assertNotNull("TaskAttempts list in ParsedTask is null", attempts);
assertTrue("TaskAttempts list in ParsedTask is empty",
attempts.size() > 0);
}
/**
* Verify if the obtainXXX methods of {@link ParsedTaskAttempt} give
* valid info
*/
private void validateParsedTaskAttemptAPI(
ParsedTaskAttempt parsedTaskAttempt) {
LOG.info("Validating ParsedTaskAttempt.obtainXXX api... for "
+ parsedTaskAttempt.getAttemptID());
assertNotNull("Counters in ParsedTaskAttempt is null",
parsedTaskAttempt.obtainCounters());
if (parsedTaskAttempt.getResult()
== Pre21JobHistoryConstants.Values.SUCCESS) {
assertTrue("Counters in ParsedTaskAttempt is empty",
parsedTaskAttempt.obtainCounters().size() > 0);
assertNull("Diagnostic-info is non-null for a succeeded taskAttempt",
parsedTaskAttempt.obtainDiagnosticInfo());
} else {
assertNotNull("Diagnostic-info is non-null for a succeeded taskAttempt",
parsedTaskAttempt.obtainDiagnosticInfo());
}
assertNotNull("TrackerName in ParsedTaskAttempt is null",
parsedTaskAttempt.obtainTrackerName());
assertNotNull("http-port info in ParsedTaskAttempt is null",
parsedTaskAttempt.obtainHttpPort());
}
@Test
public void testHadoop20JHParser() throws Exception {
// Disabled
if (true) return;
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
boolean success = false;
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(
lfs);
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
lfs);
final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
final Path tempDir = new Path(rootTempDir, "TestHadoop20JHParser");
lfs.delete(tempDir, true);
final Path inputPath = new Path(rootInputPath, "v20-single-input-log.gz");
final Path goldPath =
new Path(rootInputPath, "v20-single-input-log-event-classes.text.gz");
InputStream inputLogStream =
new PossiblyDecompressedInputStream(inputPath, conf);
InputStream inputGoldStream =
new PossiblyDecompressedInputStream(goldPath, conf);
BufferedInputStream bis = new BufferedInputStream(inputLogStream);
bis.mark(10000);
Hadoop20JHParser parser = new Hadoop20JHParser(bis);
final Path resultPath = new Path(tempDir, "result.text");
System.out.println("testHadoop20JHParser sent its output to " + resultPath);
Compressor compressor;
FileSystem fs = resultPath.getFileSystem(conf);
CompressionCodec codec =
new CompressionCodecFactory(conf).getCodec(resultPath);
OutputStream output;
if (codec != null) {
compressor = CodecPool.getCompressor(codec);
output = codec.createOutputStream(fs.create(resultPath), compressor);
} else {
output = fs.create(resultPath);
}
PrintStream printStream = new PrintStream(output);
try {
assertEquals("Hadoop20JHParser can't parse the test file", true,
Hadoop20JHParser.canParse(inputLogStream));
bis.reset();
HistoryEvent event = parser.nextEvent();
while (event != null) {
printStream.println(event.getClass().getCanonicalName());
event = parser.nextEvent();
}
printStream.close();
LineReader goldLines = new LineReader(inputGoldStream);
LineReader resultLines =
new LineReader(new PossiblyDecompressedInputStream(resultPath, conf));
int lineNumber = 1;
try {
Text goldLine = new Text();
Text resultLine = new Text();
int goldRead = goldLines.readLine(goldLine);
int resultRead = resultLines.readLine(resultLine);
while (goldRead * resultRead != 0) {
if (!goldLine.equals(resultLine)) {
assertEquals("Type mismatch detected", goldLine, resultLine);
break;
}
goldRead = goldLines.readLine(goldLine);
resultRead = resultLines.readLine(resultLine);
++lineNumber;
}
if (goldRead != resultRead) {
assertEquals("the " + (goldRead > resultRead ? "gold" : resultRead)
+ " file contains more text at line " + lineNumber, goldRead,
resultRead);
}
success = true;
} finally {
goldLines.close();
resultLines.close();
if (success) {
lfs.delete(resultPath, false);
}
}
} finally {
if (parser == null) {
inputLogStream.close();
} else {
if (parser != null) {
parser.close();
}
}
if (inputGoldStream != null) {
inputGoldStream.close();
}
// it's okay to do this twice [if we get an error on input]
printStream.close();
}
}
/**
* Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
* filenames. The testcase checks if {@link TraceBuilder}
* - correctly identifies a jobhistory filename
* - correctly parses a jobhistory filename to extract out the jobid
* - correctly identifies a job-configuration filename stored along with the
* jobhistory files
*/
@Test
public void testJobHistoryFilenameParsing() throws IOException {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
String user = "test";
org.apache.hadoop.mapred.JobID jid =
new org.apache.hadoop.mapred.JobID("12345", 1);
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", ""))
.makeQualified(lfs);
// Check if jobhistory filename are detected properly
Path jhFilename = new Path(jid + "_1234_user_jobname");
JobID extractedJID =
JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
assertEquals("TraceBuilder failed to parse the current JH filename",
jid, extractedJID);
// Check if the conf filename in jobhistory are detected properly
Path jhConfFilename = new Path(jid + "_conf.xml");
assertTrue("TraceBuilder failed to parse the current JH conf filename",
TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
}
/**
* Check if processing of input arguments is as expected by passing globbed
* input path
* <li> without -recursive option and
* <li> with -recursive option.
*/
@Test
public void testProcessInputArgument() throws Exception {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
// define the test's root temporary directory
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp"))
.makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
// define the test's root input directory
Path testRootInputDir = new Path(rootTempDir, "TestProcessInputArgument");
// define the nested input directory
Path nestedInputDir = new Path(testRootInputDir, "1/2/3/4");
// define the globbed version of the nested input directory
Path globbedInputNestedDir =
lfs.makeQualified(new Path(testRootInputDir, "*/*/*/*/*"));
try {
lfs.delete(nestedInputDir, true);
List<String> recursiveInputPaths = new ArrayList<String>();
List<String> nonRecursiveInputPaths = new ArrayList<String>();
// Create input files under the given path with multiple levels of
// sub directories
createHistoryLogsHierarchy(nestedInputDir, lfs, recursiveInputPaths,
nonRecursiveInputPaths);
// Check the case of globbed input path and without -recursive option
List<Path> inputs = MyOptions.processInputArgument(
globbedInputNestedDir.toString(), conf, false);
validateHistoryLogPaths(inputs, nonRecursiveInputPaths);
// Check the case of globbed input path and with -recursive option
inputs = MyOptions.processInputArgument(
globbedInputNestedDir.toString(), conf, true);
validateHistoryLogPaths(inputs, recursiveInputPaths);
} finally {
lfs.delete(testRootInputDir, true);
}
}
/**
* Validate if the input history log paths are as expected.
* @param inputs the resultant input paths to be validated
* @param expectedHistoryFileNames the expected input history logs
* @throws IOException
*/
private void validateHistoryLogPaths(List<Path> inputs,
List<String> expectedHistoryFileNames) throws IOException {
System.out.println("\nExpected history files are:");
for (String historyFile : expectedHistoryFileNames) {
System.out.println(historyFile);
}
System.out.println("\nResultant history files are:");
List<String> historyLogs = new ArrayList<String>();
for (Path p : inputs) {
historyLogs.add(p.toUri().getPath());
System.out.println(p.toUri().getPath());
}
assertEquals("Number of history logs found is different from the expected.",
expectedHistoryFileNames.size(), inputs.size());
// Verify if all the history logs are expected ones and they are in the
// expected order
assertTrue("Some of the history log files do not match the expected.",
historyLogs.equals(expectedHistoryFileNames));
}
/**
* Create history logs under the given path with multiple levels of
* sub directories as shown below.
* <br>
* Create a file, an empty subdirectory and a nonempty subdirectory
* <historyDir> under the given input path.
* <br>
* The subdirectory <historyDir> contains the following dir structure:
* <br>
* <br><historyDir>/historyFile1.txt
* <br><historyDir>/historyFile1.gz
* <br><historyDir>/subDir1/historyFile2.txt
* <br><historyDir>/subDir1/historyFile2.gz
* <br><historyDir>/subDir2/historyFile3.txt
* <br><historyDir>/subDir2/historyFile3.gz
* <br><historyDir>/subDir1/subDir11/historyFile4.txt
* <br><historyDir>/subDir1/subDir11/historyFile4.gz
* <br><historyDir>/subDir2/subDir21/
* <br>
* Create the lists of input paths that should be processed by TraceBuilder
* for recursive case and non-recursive case.
* @param nestedInputDir the input history logs directory where history files
* with nested subdirectories are created
* @param fs FileSystem of the input paths
* @param recursiveInputPaths input paths for recursive case
* @param nonRecursiveInputPaths input paths for non-recursive case
* @throws IOException
*/
private void createHistoryLogsHierarchy(Path nestedInputDir, FileSystem fs,
List<String> recursiveInputPaths, List<String> nonRecursiveInputPaths)
throws IOException {
List<Path> dirs = new ArrayList<Path>();
// define a file in the nested test input directory
Path inputPath1 = new Path(nestedInputDir, "historyFile.txt");
// define an empty sub-folder in the nested test input directory
Path emptyDir = new Path(nestedInputDir, "emptyDir");
// define a nonempty sub-folder in the nested test input directory
Path historyDir = new Path(nestedInputDir, "historyDir");
fs.mkdirs(nestedInputDir);
// Create an empty input file
fs.createNewFile(inputPath1);
// Create empty subdir
fs.mkdirs(emptyDir);// let us not create any files under this dir
fs.mkdirs(historyDir);
dirs.add(historyDir);
Path subDir1 = new Path(historyDir, "subDir1");
fs.mkdirs(subDir1);
dirs.add(subDir1);
Path subDir2 = new Path(historyDir, "subDir2");
fs.mkdirs(subDir2);
dirs.add(subDir2);
Path subDir11 = new Path(subDir1, "subDir11");
fs.mkdirs(subDir11);
dirs.add(subDir11);
Path subDir21 = new Path(subDir2, "subDir21");
fs.mkdirs(subDir21);// let us not create any files under this dir
int i = 0;
for (Path dir : dirs) {
i++;
Path gzPath = new Path(dir, "historyFile" + i + ".gz");
Path txtPath = new Path(dir, "historyFile" + i + ".txt");
fs.createNewFile(txtPath);
fs.createNewFile(gzPath);
recursiveInputPaths.add(gzPath.toUri().getPath());
recursiveInputPaths.add(txtPath.toUri().getPath());
if (i == 1) {
nonRecursiveInputPaths.add(gzPath.toUri().getPath());
nonRecursiveInputPaths.add(txtPath.toUri().getPath());
}
}
recursiveInputPaths.add(inputPath1.toUri().getPath());
nonRecursiveInputPaths.add(inputPath1.toUri().getPath());
}
/**
* Test if {@link CurrentJHParser} can read events from current JH files.
*/
@Test
public void testCurrentJHParser() throws Exception {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp"))
.makeQualified(lfs);
final Path tempDir = new Path(rootTempDir, "TestCurrentJHParser");
lfs.delete(tempDir, true);
String queueName = "testQueue";
// Run a MR job
// create a MR cluster
conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
conf.set("mapred.queue.names", queueName);
MiniMRCluster mrCluster = new MiniMRCluster(1, "file:///", 1, null, null,
new JobConf(conf));
// run a job
Path inDir = new Path(tempDir, "input");
Path outDir = new Path(tempDir, "output");
JobHistoryParser parser = null;
RewindableInputStream ris = null;
ArrayList<String> seenEvents = new ArrayList<String>(10);
RunningJob rJob = null;
try {
JobConf jobConf = mrCluster.createJobConf();
jobConf.setQueueName(queueName);
// construct a job with 1 map and 1 reduce task.
rJob = UtilsForTests.runJob(jobConf, inDir, outDir, 1,
1);
rJob.waitForCompletion();
assertTrue("Job failed", rJob.isSuccessful());
JobID id = rJob.getID();
// get the jobhistory filepath
Path inputPath =
new Path(JobHistory.getHistoryFilePath(
org.apache.hadoop.mapred.JobID.downgrade(id)));
// wait for 10 secs for the jobhistory file to move into the done folder
for (int i = 0; i < 100; ++i) {
if (lfs.exists(inputPath)) {
break;
}
TimeUnit.MILLISECONDS.wait(100);
}
assertTrue("Missing job history file", lfs.exists(inputPath));
InputDemuxer inputDemuxer = new DefaultInputDemuxer();
inputDemuxer.bindTo(inputPath, conf);
Pair<String, InputStream> filePair = inputDemuxer.getNext();
assertNotNull(filePair);
ris = new RewindableInputStream(filePair.second());
// Test if the JobHistoryParserFactory can detect the parser correctly
parser = JobHistoryParserFactory.getParser(ris);
// Get ParsedJob
String jobId = TraceBuilder.extractJobID(filePair.first());
JobBuilder builder = new JobBuilder(jobId);
HistoryEvent e;
while ((e = parser.nextEvent()) != null) {
String eventString = e.getEventType().toString();
System.out.println("event " + eventString);
seenEvents.add(eventString);
if (builder != null) {
builder.process(e);
}
}
ParsedJob parsedJob = builder.build();
// validate the obtainXXX api of ParsedJob, ParsedTask and
// ParsedTaskAttempt.
validateParsedJob(parsedJob, 1, 1, queueName);
} finally {
// stop the MR cluster
mrCluster.shutdown();
if (ris != null) {
ris.close();
}
if (parser != null) {
parser.close();
}
// cleanup the filesystem
lfs.delete(tempDir, true);
}
// Check against the gold standard
System.out.println("testCurrentJHParser validating using gold std ");
String[] goldLines = new String[] {"JOB_SUBMITTED", "JOB_PRIORITY_CHANGED",
"JOB_STATUS_CHANGED", "JOB_INITED", "JOB_INFO_CHANGED", "TASK_STARTED",
"SETUP_ATTEMPT_STARTED", "SETUP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED",
"TASK_UPDATED", "TASK_FINISHED", "JOB_STATUS_CHANGED", "TASK_STARTED",
"MAP_ATTEMPT_STARTED", "MAP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED",
"TASK_UPDATED", "TASK_FINISHED", "TASK_STARTED", "REDUCE_ATTEMPT_STARTED",
"REDUCE_ATTEMPT_FINISHED", "REDUCE_ATTEMPT_FINISHED", "TASK_UPDATED",
"TASK_FINISHED", "TASK_STARTED", "CLEANUP_ATTEMPT_STARTED",
"CLEANUP_ATTEMPT_FINISHED", "MAP_ATTEMPT_FINISHED", "TASK_UPDATED",
"TASK_FINISHED", "JOB_STATUS_CHANGED", "JOB_FINISHED"};
// Check the output with gold std
assertEquals("Size mismatch", goldLines.length, seenEvents.size());
int index = 0;
for (String goldLine : goldLines) {
assertEquals("Content mismatch", goldLine, seenEvents.get(index++));
}
}
/**
* Test if the {@link JobConfigurationParser} can correctly extract out
* key-value pairs from the job configuration.
*/
@Test
public void testJobConfigurationParsing() throws Exception {
final FileSystem lfs = FileSystem.getLocal(new Configuration());
final Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
lfs.getUri(), lfs.getWorkingDirectory());
final Path tempDir = new Path(rootTempDir, "TestJobConfigurationParser");
lfs.delete(tempDir, true);
// Add some configuration parameters to the conf
JobConf jConf = new JobConf(false);
String key = "test.data";
String value = "hello world";
jConf.set(key, value);
// create the job conf file
Path jobConfPath = new Path(tempDir.toString(), "job.xml");
lfs.delete(jobConfPath, false);
DataOutputStream jobConfStream = lfs.create(jobConfPath);
jConf.writeXml(jobConfStream);
jobConfStream.close();
// now read the job conf file using the job configuration parser
Properties properties =
JobConfigurationParser.parse(lfs.open(jobConfPath));
// check if the required parameter is loaded
assertEquals("Total number of extracted properties (" + properties.size()
+ ") doesn't match the expected size of 1 ["
+ "JobConfigurationParser]",
1, properties.size());
// check if the key is present in the extracted configuration
assertTrue("Key " + key + " is missing in the configuration extracted "
+ "[JobConfigurationParser]",
properties.keySet().contains(key));
// check if the desired property has the correct value
assertEquals("JobConfigurationParser couldn't recover the parameters"
+ " correctly",
value, properties.get(key));
// Test ZombieJob
LoggedJob job = new LoggedJob();
job.setJobProperties(properties);
ZombieJob zjob = new ZombieJob(job, null);
Configuration zconf = zjob.getJobConf();
// check if the required parameter is loaded
assertEquals("ZombieJob couldn't recover the parameters correctly",
value, zconf.get(key));
}
@Test
public void testJobConfigurationParser() throws Exception {
// Validate parser with old mapred config properties from
// sample-conf-file.xml
validateJobConfParser("sample-conf.file.xml");
}
private void validateJobConfParser(String confFile) throws Exception {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
@SuppressWarnings("deprecation")
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", ""))
.makeQualified(lfs);
final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
final Path inputPath = new Path(rootInputPath, confFile);
InputStream inputConfStream =
new PossiblyDecompressedInputStream(inputPath, conf);
try {
Properties props = JobConfigurationParser.parse(inputConfStream);
inputConfStream.close();
// Make sure that parser puts the interested properties into props1 and
// props2 as defined by list1 and list2.
assertEquals("Config property for job queue name is not "
+ " extracted properly.", "TheQueue",
JobBuilder.extract(props, JobConfPropertyNames.QUEUE_NAMES
.getCandidates(), null));
assertEquals("Config property for job name is not "
+ " extracted properly.", "MyMRJob",
JobBuilder.extract(props, JobConfPropertyNames.JOB_NAMES
.getCandidates(), null));
validateChildJavaOpts(props);
} finally {
inputConfStream.close();
}
}
// Validate child java opts in properties.
private void validateChildJavaOpts(Properties props) {
// if old property mapred.child.java.opts is set, then extraction of all
// the following 3 properties should give that value.
assertEquals("mapred.child.java.opts is not extracted properly.",
"-server -Xmx640m -Djava.net.preferIPv4Stack=true",
JobBuilder.extract(props, JobConfPropertyNames.TASK_JAVA_OPTS_S
.getCandidates(), null));
assertEquals("New config property " + JobConf.MAPRED_MAP_TASK_JAVA_OPTS
+ " is not extracted properly when the old config property "
+ "mapred.child.java.opts is set.",
"-server -Xmx640m -Djava.net.preferIPv4Stack=true",
JobBuilder.extract(props, JobConfPropertyNames.MAP_JAVA_OPTS_S
.getCandidates(), null));
assertEquals("New config property " + JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS
+ " is not extracted properly when the old config property "
+ "mapred.child.java.opts is set.",
"-server -Xmx640m -Djava.net.preferIPv4Stack=true",
JobBuilder.extract(props, JobConfPropertyNames.REDUCE_JAVA_OPTS_S
.getCandidates(), null));
}
/**
* Test {@link ResourceUsageMetrics}.
*/
@Test
public void testResourceUsageMetrics() throws Exception {
final long cpuUsage = 100;
final long pMemUsage = 200;
final long vMemUsage = 300;
final long heapUsage = 400;
// test ResourceUsageMetrics's setters
ResourceUsageMetrics metrics = new ResourceUsageMetrics();
metrics.setCumulativeCpuUsage(cpuUsage);
metrics.setPhysicalMemoryUsage(pMemUsage);
metrics.setVirtualMemoryUsage(vMemUsage);
metrics.setHeapUsage(heapUsage);
// test cpu usage value
assertEquals("Cpu usage values mismatch via set", cpuUsage,
metrics.getCumulativeCpuUsage());
// test pMem usage value
assertEquals("Physical memory usage values mismatch via set", pMemUsage,
metrics.getPhysicalMemoryUsage());
// test vMem usage value
assertEquals("Virtual memory usage values mismatch via set", vMemUsage,
metrics.getVirtualMemoryUsage());
// test heap usage value
assertEquals("Heap usage values mismatch via set", heapUsage,
metrics.getHeapUsage());
// test deepCompare() (pass case)
testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, vMemUsage,
pMemUsage, heapUsage, true);
// test deepCompare (fail case)
// test cpu usage mismatch
testResourceUsageMetricViaDeepCompare(metrics, 0, vMemUsage, pMemUsage,
heapUsage, false);
// test pMem usage mismatch
testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, vMemUsage, 0,
heapUsage, false);
// test vMem usage mismatch
testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, 0, pMemUsage,
heapUsage, false);
// test heap usage mismatch
testResourceUsageMetricViaDeepCompare(metrics, cpuUsage, vMemUsage,
pMemUsage, 0, false);
// define a metric with a fixed value of size()
ResourceUsageMetrics metrics2 = new ResourceUsageMetrics() {
@Override
public int size() {
return -1;
}
};
metrics2.setCumulativeCpuUsage(cpuUsage);
metrics2.setPhysicalMemoryUsage(pMemUsage);
metrics2.setVirtualMemoryUsage(vMemUsage);
metrics2.setHeapUsage(heapUsage);
// test with size mismatch
testResourceUsageMetricViaDeepCompare(metrics2, cpuUsage, vMemUsage,
pMemUsage, heapUsage, false);
}
// test ResourceUsageMetric's deepCompare() method
private static void testResourceUsageMetricViaDeepCompare(
ResourceUsageMetrics metrics, long cpuUsage,
long vMemUsage, long pMemUsage, long heapUsage,
boolean shouldPass) {
ResourceUsageMetrics testMetrics = new ResourceUsageMetrics();
testMetrics.setCumulativeCpuUsage(cpuUsage);
testMetrics.setPhysicalMemoryUsage(pMemUsage);
testMetrics.setVirtualMemoryUsage(vMemUsage);
testMetrics.setHeapUsage(heapUsage);
Boolean passed = null;
try {
metrics.deepCompare(testMetrics, new TreePath(null, "<root>"));
passed = true;
} catch (DeepInequalityException die) {
passed = false;
}
assertEquals("ResourceUsageMetrics deepCompare() failed!",
shouldPass, passed);
}
/**
* Testing {@link ResourceUsageMetrics} using {@link HadoopLogsAnalyzer}.
*/
@Test
@SuppressWarnings("deprecation")
public void testResourceUsageMetricsWithHadoopLogsAnalyzer()
throws IOException {
Configuration conf = new Configuration();
// get the input trace file
Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", ""));
Path rootInputSubFolder = new Path(rootInputDir, "rumen/small-trace-test");
Path traceFile = new Path(rootInputSubFolder, "v20-resource-usage-log.gz");
FileSystem lfs = FileSystem.getLocal(conf);
// define the root test directory
Path rootTempDir =
new Path(System.getProperty("test.build.data", "/tmp"));
// define output directory
Path outputDir =
new Path(rootTempDir, "testResourceUsageMetricsWithHadoopLogsAnalyzer");
lfs.delete(outputDir, true);
lfs.deleteOnExit(outputDir);
// run HadoopLogsAnalyzer
HadoopLogsAnalyzer analyzer = new HadoopLogsAnalyzer();
analyzer.setConf(conf);
Path traceOutput = new Path(outputDir, "trace.json");
analyzer.run(new String[] {"-write-job-trace", traceOutput.toString(),
"-v1", traceFile.toString()});
// test HadoopLogsAnalyzer's output w.r.t ResourceUsageMetrics
// get the logged job
JsonObjectMapperParser<LoggedJob> traceParser =
new JsonObjectMapperParser<LoggedJob>(traceOutput, LoggedJob.class,
conf);
// get the logged job from the output trace file
LoggedJob job = traceParser.getNext();
LoggedTaskAttempt attempt = job.getMapTasks().get(0).getAttempts().get(0);
ResourceUsageMetrics metrics = attempt.getResourceUsageMetrics();
// test via deepCompare()
testResourceUsageMetricViaDeepCompare(metrics, 200, 100, 75, 50, true);
}
@Test
public void testTopologyBuilder() throws Exception {
final TopologyBuilder subject = new TopologyBuilder();
// currently we extract no host names from the Properties
subject.process(new Properties());
subject.process(new TaskAttemptFinishedEvent(TaskAttemptID
.forName("attempt_200904211745_0003_m_000004_0"), TaskType
.valueOf("MAP"), "STATUS", 1234567890L,
"/194\\.6\\.134\\.64/cluster50261\\.secondleveldomain\\.com",
"SUCCESS", null));
subject.process(new TaskAttemptUnsuccessfulCompletionEvent(TaskAttemptID
.forName("attempt_200904211745_0003_m_000004_1"), TaskType
.valueOf("MAP"), "STATUS", 1234567890L,
"/194\\.6\\.134\\.80/cluster50262\\.secondleveldomain\\.com",
"MACHINE_EXPLODED"));
subject.process(new TaskAttemptUnsuccessfulCompletionEvent(TaskAttemptID
.forName("attempt_200904211745_0003_m_000004_2"), TaskType
.valueOf("MAP"), "STATUS", 1234567890L,
"/194\\.6\\.134\\.80/cluster50263\\.secondleveldomain\\.com",
"MACHINE_EXPLODED"));
subject.process(new TaskStartedEvent(TaskID
.forName("task_200904211745_0003_m_000004"), 1234567890L, TaskType
.valueOf("MAP"),
"/194\\.6\\.134\\.80/cluster50263\\.secondleveldomain\\.com"));
final LoggedNetworkTopology topology = subject.build();
List<LoggedNetworkTopology> racks = topology.getChildren();
assertEquals("Wrong number of racks", 2, racks.size());
boolean sawSingleton = false;
boolean sawDoubleton = false;
for (LoggedNetworkTopology rack : racks) {
List<LoggedNetworkTopology> nodes = rack.getChildren();
if (rack.getName().endsWith(".64")) {
assertEquals("The singleton rack has the wrong number of elements", 1,
nodes.size());
sawSingleton = true;
} else if (rack.getName().endsWith(".80")) {
assertEquals("The doubleton rack has the wrong number of elements", 2,
nodes.size());
sawDoubleton = true;
} else {
assertTrue("Unrecognized rack name", false);
}
}
assertTrue("Did not see singleton rack", sawSingleton);
assertTrue("Did not see doubleton rack", sawDoubleton);
}
static private <T extends DeepCompare> void jsonFileMatchesGold(
Configuration conf, Path result, Path gold, Class<? extends T> clazz,
String fileDescription) throws IOException {
JsonObjectMapperParser<T> goldParser =
new JsonObjectMapperParser<T>(gold, clazz, conf);
JsonObjectMapperParser<T> resultParser =
new JsonObjectMapperParser<T>(result, clazz, conf);
try {
while (true) {
DeepCompare goldJob = goldParser.getNext();
DeepCompare resultJob = resultParser.getNext();
if ((goldJob == null) || (resultJob == null)) {
assertTrue(goldJob == resultJob);
break;
}
try {
resultJob.deepCompare(goldJob, new TreePath(null, "<root>"));
} catch (DeepInequalityException e) {
String error = e.path.toString();
assertFalse(fileDescription + " mismatches: " + error, true);
}
}
} finally {
IOUtils.cleanup(null, goldParser, resultParser);
}
}
}