package org.apache.hadoop.mapred; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.examples.SleepJob; import com.sun.el.parser.ParseException; public class SleepJobRunner { private static final Log LOG = LogFactory.getLog(SleepJobRunner.class); public static void printHelp(Options options) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("SleepJobRunner [options] numberOfJobs " + "percentOfSmallJobs percentOfShortJobs", options ); System.out.println(""); System.out.println("numberOfJobs\tthe number of jobs to launch"); System.out.println("percentOfSmallJobs\tpercentage of jobs to be small: " + "100 mappers, 1 reducer (default). The long jobs are the rest " + "with 5000 maps and 397 reducers (default)"); System.out.println("percentOfShortJobs\tpercentage of jobs to be short: " + "1 ms of wait time. The long jobas are the rest with " + "60 seconds of wait time in the mapper"); System.out.println("poolCount\tnumber of pools to spread the jobs over: " + "default of 15"); } /** * Helper class used to help return a set of values for calcStats() */ private static class Stats { public double mean; public double variance; public double stdDev; public Stats(double mean, double variance, double stdDev) { this.mean = mean; this.variance = variance; this.stdDev = stdDev; } } /** * Calculates mean, variance, standard deviation for a set of numbers * @param nums the list of numbers to compute stats on * @return aforementioned values in a Stats helper class */ private static Stats calcStats(List<Double> nums) { double sum = 0.0, mean = 0.0, variance = 0.0, stdDev = 0.0; for (Double d : nums) { sum += d.doubleValue(); } if (nums.size() > 0) { mean = sum / nums.size(); } sum = 0.0; for (Double d : nums) { sum += (d.doubleValue() - mean) * (d.doubleValue() - mean); } if (nums.size() > 0) { variance = sum / nums.size(); } stdDev = Math.sqrt(variance); return new Stats(mean, variance, stdDev); } @SuppressWarnings("static-access") public static void main(String[] args) throws Exception { // Parse the options int largeJobMappers = 5000, largeJobReducers = 397, poolCount = 15; int smallJobMappers = 10, smallJobReducers = 1; Option help = new Option( "help", "print this message" ); Option largeJobMappersOption = OptionBuilder.withArgName("size").hasArg() .withDescription("number of mappers for large jobs" ) .create("largeJobMappers"); Option largeJobReducersOption = OptionBuilder.withArgName("size").hasArg() .withDescription("number of reducers for large jobs") .create("largeJobReducers"); Option poolCountOption = OptionBuilder.withArgName("size").hasArg() .withDescription("number of pools to spread the load over") .create("poolCount"); Options options = new Options(); options.addOption(help); options.addOption(largeJobMappersOption); options.addOption(largeJobReducersOption); options.addOption(poolCountOption); CommandLineParser parser = new GnuParser(); CommandLine line = null; line = parser.parse(options, args); if (line.hasOption( "help" ) ) { printHelp(options); return; } if (line.hasOption("largeJobMappers")) { largeJobMappers = Integer.parseInt( line.getOptionValue("largeJobMappers")); } if (line.hasOption("largeJobReducers")) { largeJobReducers = Integer.parseInt( line.getOptionValue("largeJobReducers")); } if (line.hasOption("poolCount")) { poolCount = Integer.parseInt( line.getOptionValue("poolCount")); } String[] pools = new String[poolCount]; for (int i = 0; i < pools.length; i++) { pools[i] = "pool" + i; } if (line.getArgs().length != 3) { printHelp(options); return; } int jobs = Integer.valueOf(line.getArgs()[0]); int percentageSmall = Integer.valueOf(line.getArgs()[1]); int percentageShort = Integer.valueOf(line.getArgs()[2]); List<SleepJobRunnerThread> threads = new ArrayList<SleepJobRunnerThread>(); Random rand = new Random(); for (int i = 0; i < jobs; i++) { Configuration conf = new Configuration(); conf.set("mapred.child.java.opts", "-Xmx50m -Djava.net.preferIPv4Stack=true " + "-XX:+UseCompressedOops"); conf.set("io.sort.mb", "5"); conf.set("mapred.fairscheduler.pool", pools[i % pools.length]); int nMappers, nReducers, sleepTime; if (rand.nextInt(100) + 1 <= percentageSmall) { nMappers = smallJobMappers; nReducers = smallJobReducers; } else { nMappers = largeJobMappers; nReducers = largeJobReducers; } if (rand.nextInt(100) + 1 <= percentageShort) { sleepTime = 1; } else { sleepTime = 60000; } SleepJob sleepJob = new SleepJob(); sleepJob.setConf(conf); SleepJobRunnerThread t = new SleepJobRunnerThread(conf, nMappers, nReducers, sleepTime); threads.add(t); } long startTime = System.currentTimeMillis(); for (SleepJobRunnerThread t : threads) { t.start(); } for (SleepJobRunnerThread t : threads) { t.join(); } long endTime = System.currentTimeMillis(); // Compute stats List<Double> smallJobRuntimes = new ArrayList<Double>(); List<Double> largeJobRuntimes = new ArrayList<Double>(); for (SleepJobRunnerThread t : threads) { if (t.getNumMappers() == largeJobMappers && t.getNumReducers() == largeJobReducers) { largeJobRuntimes.add(Double.valueOf(t.getRuntime()/1000.0)); } else if (t.getNumMappers() == smallJobMappers && t.getNumReducers() == smallJobReducers) { smallJobRuntimes.add(Double.valueOf(t.getRuntime()/1000.0)); } else { throw new RuntimeException("Invalid mapper/reducer counts: " + t.getNumMappers() + ", " + t.getNumReducers()); } } List<Double> allJobRuntimes = new ArrayList<Double>(); allJobRuntimes.addAll(smallJobRuntimes); allJobRuntimes.addAll(largeJobRuntimes); Stats allStats = calcStats(allJobRuntimes); Stats largeStats = calcStats(largeJobRuntimes); Stats smallStats = calcStats(smallJobRuntimes); LOG.info(String.format("All jobs - mean: %.1f s std dev: %.1f s\n", allStats.mean, allStats.stdDev)); LOG.info(String.format("Large jobs - mean: %.1f s std dev: %.1f s\n", largeStats.mean, largeStats.stdDev)); LOG.info(String.format("Small jobs - mean: %.1f s std dev: %.1f s\n", smallStats.mean, smallStats.stdDev)); LOG.info(String.format("Total time - %.1f\n", (endTime - startTime)/1000.0)); } public static class SleepJobRunnerThread extends Thread { SleepJob jobToRun = null; int nMappers = 0; int nReducers = 0; int sleepTime = 0; long startTime = 0; long endTime = 0; public SleepJobRunnerThread(Configuration conf, int nMappers, int nReducers, int sleepTime) { super(); jobToRun = new SleepJob(); jobToRun.setConf(conf); this.nMappers = nMappers; this.nReducers = nReducers; this.sleepTime = sleepTime; } @Override public void run() { try { this.startTime = System.currentTimeMillis(); jobToRun.run(nMappers, nReducers, sleepTime, 10, sleepTime, 10, false, new ArrayList<String>(), new ArrayList<String>(), 10, 10, new ArrayList<String>(), 0, false, 0); } catch (Exception ex) { ex.printStackTrace(); } finally { this.endTime = System.currentTimeMillis(); } } /** * Returns the time it took to run this job in miliseconds */ public long getRuntime() { if (endTime == 0) { throw new RuntimeException("Can't get runtime - job didn't finish"); } return endTime - startTime; } public int getNumMappers() { return nMappers; } public int getNumReducers() { return nReducers; } } }