/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.File; import java.io.FileInputStream; import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.Iterator; import java.util.List; import java.util.Properties; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat; import org.apache.hadoop.mapred.lib.IdentityMapper; import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus; import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; import org.apache.hadoop.util.StringUtils; import org.apache.commons.logging.Log; /** * Utilities used in unit test. * */ public class UtilsForTests { static final Log LOG = LogFactory.getLog(UtilsForTests.class); final static long KB = 1024L * 1; final static long MB = 1024L * KB; final static long GB = 1024L * MB; final static long TB = 1024L * GB; final static long PB = 1024L * TB; final static Object waitLock = new Object(); static DecimalFormat dfm = new DecimalFormat("####.000"); static DecimalFormat ifm = new DecimalFormat("###,###,###,###,###"); public static String dfmt(double d) { return dfm.format(d); } public static String ifmt(double d) { return ifm.format(d); } public static String formatBytes(long numBytes) { StringBuffer buf = new StringBuffer(); boolean bDetails = true; double num = numBytes; if (numBytes < KB) { buf.append(numBytes + " B"); bDetails = false; } else if (numBytes < MB) { buf.append(dfmt(num / KB) + " KB"); } else if (numBytes < GB) { buf.append(dfmt(num / MB) + " MB"); } else if (numBytes < TB) { buf.append(dfmt(num / GB) + " GB"); } else if (numBytes < PB) { buf.append(dfmt(num / TB) + " TB"); } else { buf.append(dfmt(num / PB) + " PB"); } if (bDetails) { buf.append(" (" + ifmt(numBytes) + " bytes)"); } return buf.toString(); } public static String formatBytes2(long numBytes) { StringBuffer buf = new StringBuffer(); long u = 0; if (numBytes >= TB) { u = numBytes / TB; numBytes -= u * TB; buf.append(u + " TB "); } if (numBytes >= GB) { u = numBytes / GB; numBytes -= u * GB; buf.append(u + " GB "); } if (numBytes >= MB) { u = numBytes / MB; numBytes -= u * MB; buf.append(u + " MB "); } if (numBytes >= KB) { u = numBytes / KB; numBytes -= u * KB; buf.append(u + " KB "); } buf.append(u + " B"); //even if zero return buf.toString(); } static final String regexpSpecials = "[]()?*+|.!^-\\~@"; public static String regexpEscape(String plain) { StringBuffer buf = new StringBuffer(); char[] ch = plain.toCharArray(); int csup = ch.length; for (int c = 0; c < csup; c++) { if (regexpSpecials.indexOf(ch[c]) != -1) { buf.append("\\"); } buf.append(ch[c]); } return buf.toString(); } public static String safeGetCanonicalPath(File f) { try { String s = f.getCanonicalPath(); return (s == null) ? f.toString() : s; } catch (IOException io) { return f.toString(); } } public static String slurp(File f) throws IOException { int len = (int) f.length(); byte[] buf = new byte[len]; FileInputStream in = new FileInputStream(f); String contents = null; try { in.read(buf, 0, len); contents = new String(buf, "UTF-8"); } finally { in.close(); } return contents; } public static String slurpHadoop(Path p, FileSystem fs) throws IOException { int len = (int) fs.getFileStatus(p).getLen(); byte[] buf = new byte[len]; InputStream in = fs.open(p); String contents = null; try { in.read(buf, 0, len); contents = new String(buf, "UTF-8"); } finally { in.close(); } return contents; } public static String rjustify(String s, int width) { if (s == null) s = "null"; if (width > s.length()) { s = getSpace(width - s.length()) + s; } return s; } public static String ljustify(String s, int width) { if (s == null) s = "null"; if (width > s.length()) { s = s + getSpace(width - s.length()); } return s; } static char[] space; static { space = new char[300]; Arrays.fill(space, '\u0020'); } public static String getSpace(int len) { if (len > space.length) { space = new char[Math.max(len, 2 * space.length)]; Arrays.fill(space, '\u0020'); } return new String(space, 0, len); } /** * Gets job status from the jobtracker given the jobclient and the job id */ static JobStatus getJobStatus(JobClient jc, JobID id) throws IOException { JobStatus[] statuses = jc.getAllJobs(); for (JobStatus jobStatus : statuses) { if (jobStatus.getJobID().equals(id)) { return jobStatus; } } return null; } /** * A utility that waits for specified amount of time */ public static void waitFor(long duration) { try { synchronized (waitLock) { waitLock.wait(duration); } } catch (InterruptedException ie) {} } /** * Wait for the jobtracker to be RUNNING. */ static void waitForJobTracker(JobClient jobClient) { while (true) { try { ClusterStatus status = jobClient.getClusterStatus(); while (status.getJobTrackerStatus() != JobTrackerStatus.RUNNING) { waitFor(100); status = jobClient.getClusterStatus(); } break; // means that the jt is ready } catch (IOException ioe) {} } } /** * Waits until all the jobs at the jobtracker complete. */ static void waitTillDone(JobClient jobClient) throws IOException { // Wait for the last job to complete while (true) { boolean shouldWait = false; for (JobStatus jobStatuses : jobClient.getAllJobs()) { if (jobStatuses.getRunState() != JobStatus.SUCCEEDED && jobStatuses.getRunState() != JobStatus.FAILED && jobStatuses.getRunState() != JobStatus.KILLED) { shouldWait = true; break; } } if (shouldWait) { waitFor(100); } else { break; } } } /** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); } /** * Commonly used map and reduce classes */ /** * Map is a Mapper that just waits for a file to be created on the dfs. The * file creation is a signal to the mappers and hence acts as a waiting job. */ static class WaitingMapper extends MapReduceBase implements Mapper<WritableComparable, Writable, WritableComparable, Writable> { FileSystem fs = null; Path signal; int id = 0; int totalMaps = 0; /** * Checks if the map task needs to wait. By default all the maps will wait. * This method needs to be overridden to make a custom waiting mapper. */ public boolean shouldWait(int id) { return true; } /** * Returns a signal file on which the map task should wait. By default all * the maps wait on a single file passed as test.mapred.map.waiting.target. * This method needs to be overridden to make a custom waiting mapper */ public Path getSignalFile(int id) { return signal; } /** The waiting function. The map exits once it gets a signal. Here the * signal is the file existence. */ public void map(WritableComparable key, Writable val, OutputCollector<WritableComparable, Writable> output, Reporter reporter) throws IOException { if (shouldWait(id)) { if (fs != null) { while (!fs.exists(getSignalFile(id))) { try { reporter.progress(); synchronized (this) { this.wait(1000); // wait for 1 sec } } catch (InterruptedException ie) { System.out.println("Interrupted while the map was waiting for " + " the signal."); break; } } } else { throw new IOException("Could not get the DFS!!"); } } } public void configure(JobConf conf) { try { String taskId = conf.get(JobContext.TASK_ATTEMPT_ID); id = Integer.parseInt(taskId.split("_")[4]); totalMaps = Integer.parseInt(conf.get(JobContext.NUM_MAPS)); fs = FileSystem.get(conf); signal = new Path(conf.get(getTaskSignalParameter(true))); } catch (IOException ioe) { System.out.println("Got an exception while obtaining the filesystem"); } } } /** Only the later half of the maps wait for the signal while the rest * complete immediately. */ static class HalfWaitingMapper extends WaitingMapper { @Override public boolean shouldWait(int id) { return id >= (totalMaps / 2); } } /** * Reduce that just waits for a file to be created on the dfs. The * file creation is a signal to the reduce. */ static class WaitingReducer extends MapReduceBase implements Reducer<WritableComparable, Writable, WritableComparable, Writable> { FileSystem fs = null; Path signal; /** The waiting function. The reduce exits once it gets a signal. Here the * signal is the file existence. */ public void reduce(WritableComparable key, Iterator<Writable> val, OutputCollector<WritableComparable, Writable> output, Reporter reporter) throws IOException { if (fs != null) { while (!fs.exists(signal)) { try { reporter.progress(); synchronized (this) { this.wait(1000); // wait for 1 sec } } catch (InterruptedException ie) { System.out.println("Interrupted while the map was waiting for the" + " signal."); break; } } } else { throw new IOException("Could not get the DFS!!"); } } public void configure(JobConf conf) { try { fs = FileSystem.get(conf); signal = new Path(conf.get(getTaskSignalParameter(false))); } catch (IOException ioe) { System.out.println("Got an exception while obtaining the filesystem"); } } } static String getTaskSignalParameter(boolean isMap) { return isMap ? "test.mapred.map.waiting.target" : "test.mapred.reduce.waiting.target"; } /** * Signal the maps/reduces to start. */ static void signalTasks(MiniDFSCluster dfs, FileSystem fileSys, String mapSignalFile, String reduceSignalFile, int replication) throws IOException, TimeoutException { try { writeFile(dfs.getNameNode(), fileSys.getConf(), new Path(mapSignalFile), (short)replication); writeFile(dfs.getNameNode(), fileSys.getConf(), new Path(reduceSignalFile), (short)replication); } catch (InterruptedException ie) { // Ignore } } /** * Signal the maps/reduces to start. */ static void signalTasks(MiniDFSCluster dfs, FileSystem fileSys, boolean isMap, String mapSignalFile, String reduceSignalFile) throws IOException, TimeoutException { try { // signal the maps to complete writeFile(dfs.getNameNode(), fileSys.getConf(), isMap ? new Path(mapSignalFile) : new Path(reduceSignalFile), (short)1); } catch (InterruptedException ie) { // Ignore } } static String getSignalFile(Path dir) { return (new Path(dir, "signal")).toString(); } static String getMapSignalFile(Path dir) { return (new Path(dir, "map-signal")).toString(); } static String getReduceSignalFile(Path dir) { return (new Path(dir, "reduce-signal")).toString(); } static void writeFile(NameNode namenode, Configuration conf, Path name, short replication) throws IOException, TimeoutException, InterruptedException { FileSystem fileSys = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, name, BytesWritable.class, BytesWritable.class, CompressionType.NONE); writer.append(new BytesWritable(), new BytesWritable()); writer.close(); fileSys.setReplication(name, replication); DFSTestUtil.waitReplication(fileSys, name, replication); } // Input formats /** * A custom input format that creates virtual inputs of a single string * for each map. */ public static class RandomInputFormat implements InputFormat<Text, Text> { public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { InputSplit[] result = new InputSplit[numSplits]; Path outDir = FileOutputFormat.getOutputPath(job); for(int i=0; i < result.length; ++i) { result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (String[])null); } return result; } static class RandomRecordReader implements RecordReader<Text, Text> { Path name; public RandomRecordReader(Path p) { name = p; } public boolean next(Text key, Text value) { if (name != null) { key.set(name.getName()); name = null; return true; } return false; } public Text createKey() { return new Text(); } public Text createValue() { return new Text(); } public long getPos() { return 0; } public void close() {} public float getProgress() { return 0.0f; } } public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { return new RandomRecordReader(((FileSplit) split).getPath()); } } // Start a job and return its RunningJob object static RunningJob runJob(JobConf conf, Path inDir, Path outDir) throws IOException { return runJob(conf, inDir, outDir, conf.getNumMapTasks(), conf.getNumReduceTasks()); } // Start a job and return its RunningJob object static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) throws IOException { String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; // submit the job and wait for it to complete return runJob(conf, inDir, outDir, numMaps, numReds, input); } // Start a job with the specified input and return its RunningJob object static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds, String input) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (!fs.exists(inDir)) { fs.mkdirs(inDir); } for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.submitJob(conf); return job; } // Run a job that will be succeeded and wait until it completes public static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-succeed"); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (!job.isComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } return job; } // Run a job that will be failed and wait until it completes public static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-fail"); conf.setMapperClass(FailMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setMaxMapAttempts(1); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (!job.isComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } return job; } // Run a job that will be killed and wait until it completes public static RunningJob runJobKill(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-kill"); conf.setMapperClass(KillMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (job.getJobState() != JobStatus.RUNNING) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } job.killJob(); sleepCount = 0; while (job.cleanupProgress() == 0.0f) { try { if (sleepCount > 2000) { // 20 seconds throw new IOException("Job cleanup didn't start in 20 seconds"); } Thread.sleep(10); sleepCount++; } catch (InterruptedException ie) { break; } } return job; } /** * Cleans up files/dirs inline. CleanupQueue deletes in a separate thread * asynchronously. */ public static class InlineCleanupQueue extends CleanupQueue { List<String> stalePaths = new ArrayList<String>(); public InlineCleanupQueue() { // do nothing } @Override public void addToQueue(PathDeletionContext... contexts) { // delete paths in-line for (PathDeletionContext context : contexts) { try { if (!deletePath(context)) { LOG.warn("Stale path " + context.fullPath); stalePaths.add(context.fullPath); } } catch (IOException e) { LOG.warn("Caught exception while deleting path " + context.fullPath); LOG.info(StringUtils.stringifyException(e)); stalePaths.add(context.fullPath); } } } } static class FakeClock extends Clock { long time = 0; public void advance(long millis) { time += millis; } @Override long getTime() { return time; } } // Mapper that fails static class FailMapper extends MapReduceBase implements Mapper<WritableComparable, Writable, WritableComparable, Writable> { public void map(WritableComparable key, Writable value, OutputCollector<WritableComparable, Writable> out, Reporter reporter) throws IOException { //NOTE- the next line is required for the TestDebugScript test to succeed System.err.println("failing map"); throw new RuntimeException("failing map"); } } // Mapper that sleeps for a long time. // Used for running a job that will be killed static class KillMapper extends MapReduceBase implements Mapper<WritableComparable, Writable, WritableComparable, Writable> { public void map(WritableComparable key, Writable value, OutputCollector<WritableComparable, Writable> out, Reporter reporter) throws IOException { try { Thread.sleep(1000000); } catch (InterruptedException e) { // Do nothing } } } static void setUpConfigFile(Properties confProps, File configFile) throws IOException { Configuration config = new Configuration(false); FileOutputStream fos = new FileOutputStream(configFile); for (Enumeration<?> e = confProps.propertyNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); config.set(key, confProps.getProperty(key)); } config.writeXml(fos); fos.close(); } /** * This creates a file in the dfs * @param dfs FileSystem Local File System where file needs to be picked * @param URIPATH Path dfs path where file needs to be copied * @param permission FsPermission File permission * @return returns the DataOutputStream */ public static DataOutputStream createTmpFileDFS(FileSystem dfs, Path URIPATH, FsPermission permission, String input) throws Exception { //Creating the path with the file DataOutputStream file = FileSystem.create(dfs, URIPATH, permission); file.writeBytes(input); file.close(); return file; } /** * This formats the long tasktracker name to just the FQDN * @param taskTrackerLong String The long format of the tasktracker string * @return String The FQDN of the tasktracker * @throws Exception */ public static String getFQDNofTT (String taskTrackerLong) throws Exception { //Getting the exact FQDN of the tasktracker from the tasktracker string. String[] firstSplit = taskTrackerLong.split("_"); String tmpOutput = firstSplit[1]; String[] secondSplit = tmpOutput.split(":"); String tmpTaskTracker = secondSplit[0]; return tmpTaskTracker; } }