/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.streaming; import java.io.DataOutputStream; import java.io.IOException; import java.io.File; import org.junit.After; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.Counters; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.mapred.TaskAttemptID; import org.apache.hadoop.mapred.TaskID; import org.apache.hadoop.mapred.TaskLog; import org.apache.hadoop.mapred.TaskReport; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.MapReduceTestUtil; import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; /** * Tests if mapper/reducer with empty/nonempty input works properly if * reporting is done using lines like "reporter:status:" and * "reporter:counter:" before map()/reduce() method is called. * Validates the task's log of STDERR if messages are written to stderr before * map()/reduce() is called. * Also validates job output. * Uses MiniMR since the local jobtracker doesn't track task status. */ public class TestStreamingStatus { protected static String TEST_ROOT_DIR = new File(System.getProperty("test.build.data","/tmp"), TestStreamingStatus.class.getSimpleName()) .toURI().toString().replace(' ', '+'); protected String INPUT_FILE = TEST_ROOT_DIR + "/input.txt"; protected String OUTPUT_DIR = TEST_ROOT_DIR + "/out"; protected String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n"; protected String map = null; protected String reduce = null; protected String scriptFile = TEST_ROOT_DIR + "/perlScript.pl"; protected String scriptFileName = new Path(scriptFile).toUri().getPath(); String expectedStderr = "my error msg before consuming input\n" + "my error msg after consuming input\n"; String expectedOutput = null;// inited in setUp() String expectedStatus = "before consuming input"; // This script does the following // (a) setting task status before reading input // (b) writing to stderr before reading input and after reading input // (c) writing to stdout before reading input // (d) incrementing user counter before reading input and after reading input // Write lines to stdout before reading input{(c) above} is to validate // the hanging task issue when input to task is empty(because of not starting // output thread). protected String script = "#!/usr/bin/perl\n" + "print STDERR \"reporter:status:" + expectedStatus + "\\n\";\n" + "print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n" + "print STDERR \"my error msg before consuming input\\n\";\n" + "for($count = 1500; $count >= 1; $count--) {print STDOUT \"$count \";}" + "while(<STDIN>) {chomp;}\n" + "print STDERR \"my error msg after consuming input\\n\";\n" + "print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n"; MiniMRCluster mr = null; FileSystem fs = null; JobConf conf = null; /** * Start the cluster and create input file before running the actual test. * * @throws IOException */ @Before public void setUp() throws IOException { conf = new JobConf(); conf.setBoolean(JTConfig.JT_RETIREJOBS, false); conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false); mr = new MiniMRCluster(1, "file:///", 3, null , null, conf); Path inFile = new Path(INPUT_FILE); fs = inFile.getFileSystem(mr.createJobConf()); clean(fs); buildExpectedJobOutput(); } /** * Kill the cluster after the test is done. */ @After public void tearDown() { if (fs != null) { clean(fs); } if (mr != null) { mr.shutdown(); } } // Updates expectedOutput to have the expected job output as a string void buildExpectedJobOutput() { if (expectedOutput == null) { expectedOutput = ""; for(int i = 1500; i >= 1; i--) { expectedOutput = expectedOutput.concat(Integer.toString(i) + " "); } expectedOutput = expectedOutput.trim(); } } // Create empty/nonempty input file. // Create script file with the specified content. protected void createInputAndScript(boolean isEmptyInput, String script) throws IOException { makeInput(fs, isEmptyInput ? "" : input); // create script file DataOutputStream file = fs.create(new Path(scriptFileName)); file.writeBytes(script); file.close(); } protected String[] genArgs(String jobtracker, String mapper, String reducer) { return new String[] { "-input", INPUT_FILE, "-output", OUTPUT_DIR, "-mapper", mapper, "-reducer", reducer, "-jobconf", MRJobConfig.NUM_MAPS + "=1", "-jobconf", MRJobConfig.NUM_REDUCES + "=1", "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true", "-jobconf", "stream.tmpdir=" + new Path(TEST_ROOT_DIR).toUri().getPath(), "-jobconf", JTConfig.JT_IPC_ADDRESS + "="+jobtracker, "-jobconf", "fs.default.name=file:///", "-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR, "-jobconf", "mapreduce.framework.name=yarn" }; } // create input file with the given content public void makeInput(FileSystem fs, String input) throws IOException { Path inFile = new Path(INPUT_FILE); DataOutputStream file = fs.create(inFile); file.writeBytes(input); file.close(); } // Delete output directory protected void deleteOutDir(FileSystem fs) { try { Path outDir = new Path(OUTPUT_DIR); fs.delete(outDir, true); } catch (Exception e) {} } // Delete input file, script file and output directory public void clean(FileSystem fs) { deleteOutDir(fs); try { Path file = new Path(INPUT_FILE); if (fs.exists(file)) { fs.delete(file, false); } file = new Path(scriptFile); if (fs.exists(file)) { fs.delete(file, false); } } catch (Exception e) { e.printStackTrace(); } } /** * Check if mapper/reducer with empty/nonempty input works properly if * reporting is done using lines like "reporter:status:" and * "reporter:counter:" before map()/reduce() method is called. * Validate the task's log of STDERR if messages are written * to stderr before map()/reduce() is called. * Also validate job output. * * @throws IOException */ @Test public void testReporting() throws Exception { testStreamJob(false);// nonempty input testStreamJob(true);// empty input } /** * Run a streaming job with the given script as mapper and validate. * Run another streaming job with the given script as reducer and validate. * * @param isEmptyInput Should the input to the script be empty ? * @param script The content of the script that will run as the streaming task */ private void testStreamJob(boolean isEmptyInput) throws IOException { createInputAndScript(isEmptyInput, script); // Check if streaming mapper works as expected map = scriptFileName; reduce = "/bin/cat"; runStreamJob(TaskType.MAP, isEmptyInput); deleteOutDir(fs); // Check if streaming reducer works as expected. map = "/bin/cat"; reduce = scriptFileName; runStreamJob(TaskType.REDUCE, isEmptyInput); clean(fs); } // Run streaming job for the specified input file, mapper and reducer and // (1) Validate if the job succeeds. // (2) Validate if user counter is incremented properly for the cases of // (a) nonempty input to map // (b) empty input to map and // (c) nonempty input to reduce // (3) Validate task status for the cases of (2)(a),(2)(b),(2)(c). // Because empty input to reduce task => reporter is dummy and ignores // all "reporter:status" and "reporter:counter" lines. // (4) Validate stderr of task of given task type. // (5) Validate job output void runStreamJob(TaskType type, boolean isEmptyInput) throws IOException { boolean mayExit = false; StreamJob job = new StreamJob(genArgs( mr.createJobConf().get(JTConfig.JT_IPC_ADDRESS), map, reduce), mayExit); int returnValue = job.go(); assertEquals(0, returnValue); // If input to reducer is empty, dummy reporter(which ignores all // reporting lines) is set for MRErrorThread in waitOutputThreads(). So // expectedCounterValue is 0 for empty-input-to-reducer case. // Output of reducer is also empty for empty-input-to-reducer case. int expectedCounterValue = 0; if (type == TaskType.MAP || !isEmptyInput) { validateTaskStatus(job, type); // output is from "print STDOUT" statements in perl script validateJobOutput(job.getConf()); expectedCounterValue = 2; } validateUserCounter(job, expectedCounterValue); validateTaskStderr(job, type); deleteOutDir(fs); } // validate task status of task of given type(validates 1st task of that type) void validateTaskStatus(StreamJob job, TaskType type) throws IOException { // Map Task has 2 phases: map, sort // Reduce Task has 3 phases: copy, sort, reduce String finalPhaseInTask; TaskReport[] reports; if (type == TaskType.MAP) { reports = job.jc_.getMapTaskReports(job.jobId_); finalPhaseInTask = "sort"; } else {// reduce task reports = job.jc_.getReduceTaskReports(job.jobId_); finalPhaseInTask = "reduce"; } assertEquals(1, reports.length); assertEquals(expectedStatus + " > " + finalPhaseInTask, reports[0].getState()); } // Validate the job output void validateJobOutput(Configuration conf) throws IOException { String output = MapReduceTestUtil.readOutput( new Path(OUTPUT_DIR), conf).trim(); assertTrue(output.equals(expectedOutput)); } // Validate stderr task log of given task type(validates 1st // task of that type). void validateTaskStderr(StreamJob job, TaskType type) throws IOException { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(job.jobId_, type, 0), 0); String log = MapReduceTestUtil.readTaskLog(TaskLog.LogName.STDERR, attemptId, false); // trim() is called on expectedStderr here because the method // MapReduceTestUtil.readTaskLog() returns trimmed String. assertTrue(log.equals(expectedStderr.trim())); } // Validate if user counter is incremented properly void validateUserCounter(StreamJob job, int expectedCounterValue) throws IOException { Counters counters = job.running_.getCounters(); assertEquals(expectedCounterValue, counters.findCounter( "myOwnCounterGroup", "myOwnCounter").getValue()); } }