package hip.ch4; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import hip.util.Cli; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import java.io.*; import java.util.List; import java.util.Map; /** * Run the compression benchmarks used to generate the data in the book. * * Usage: * * <pre> * hip hip.ch6.CompressionIOBenchmark --text-file test.txt --work-dir /tmp/hip-compress --runs 10 * </pre> */ public class CompressionIOBenchmark extends Configured implements Tool { enum Codec { DEFLATE(org.apache.hadoop.io.compress.DeflateCodec.class), GZIP(org.apache.hadoop.io.compress.GzipCodec.class), BZIP2_JAVA(org.apache.hadoop.io.compress.BZip2Codec.class, ImmutableMap.of("io.compression.codec.bzip2.library", "java-builtin")), BZIP2_NATIVE(org.apache.hadoop.io.compress.BZip2Codec.class, ImmutableMap.of("io.compression.codec.bzip2.library", "system-native")), LZO(com.hadoop.compression.lzo.LzoCodec.class), LZOP(com.hadoop.compression.lzo.LzopCodec.class), LZ4(org.apache.hadoop.io.compress.Lz4Codec.class), SNAPPY(org.apache.hadoop.io.compress.SnappyCodec.class); private final Class<? extends CompressionCodec> codec; private final Map<String, String> props; Codec(Class<? extends CompressionCodec> codec) { this(codec, null); } Codec(Class<? extends CompressionCodec> codec, Map<String, String> props) { this.codec = codec; this.props = props; } public Class<? extends CompressionCodec> getCodec() { return codec; } public Map<String, String> getProps() { return props; } public void updateConfiguration(Configuration conf) { if (props != null) { for(Map.Entry<String, String> entry: props.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } } } } public enum Opts implements Cli.ArgGetter { TEXT_FILE(Cli.ArgBuilder.builder().hasArgument(true).required(true).description("Text file to run tests on.")), BINARY_FILE(Cli.ArgBuilder.builder().hasArgument(true).required(false).description("Binary file to run tests on.")), WORK_DIR(Cli.ArgBuilder.builder().hasArgument(true).required(true).description("Work directory")), CODECS(Cli.ArgBuilder.builder().hasArgument(true).required(false).description("A csv-list of codec names to test")), RUNS(Cli.ArgBuilder.builder().hasArgument(true).required(true).description("Number of iterations")); private final Cli.ArgInfo argInfo; Opts(final Cli.ArgBuilder builder) { this.argInfo = builder.setArgName(name()).build(); } @Override public Cli.ArgInfo getArgInfo() { return argInfo; } } /** * Main entry point for the example. * * @param args arguments * @throws Exception when something goes wrong */ public static void main(final String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new CompressionIOBenchmark(), args); System.exit(res); } /** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(Opts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } File textFile = new File(cli.getArgValueAsString(Opts.TEXT_FILE)); // File binaryFile = new File(cli.getArgValueAsString(Opts.BINARY_FILE)); File workDir = new File(cli.getArgValueAsString(Opts.WORK_DIR)); int runs = cli.getArgValueAsInt(Opts.RUNS); String codecs = cli.getArgValueAsString(Opts.CODECS); Codec[] codecsToRun = Codec.values(); if (StringUtils.isNotBlank(codecs)) { List<Codec> userCodecs = Lists.newArrayList(); for (String codecName: StringUtils.split(codecs, ",")) { userCodecs.add(Codec.valueOf(codecName)); } codecsToRun = userCodecs.toArray(new Codec[userCodecs.size()]); } if (!textFile.isFile()) { throw new IOException("Missing file: " + textFile.getAbsolutePath()); } Configuration conf = super.getConf(); dumpHeader(); for (Codec codec : codecsToRun) { test(conf, codec, textFile, workDir, false, runs, false); } return 0; } public static void test(Configuration orig, Codec codec, File srcFile, File workDir, boolean binary, int runs, boolean trial) throws ClassNotFoundException, IllegalAccessException, InstantiationException, IOException { FileUtils.deleteQuietly(workDir); FileUtils.forceMkdir(workDir); File destFile = new File(workDir, "compressed"); File uncompressedDestFile = new File(workDir, "uncompressed"); Configuration newConf = new Configuration(orig); CompressionCodec compressionCodec = ReflectionUtils.newInstance(codec.getCodec(), newConf); codec.updateConfiguration(newConf); int accumulatedCompressMillis = 0; int accumulatedDecompressMillis = 0; for(int i=0; i < runs; i++) { System.err.println(codec.name() + " run " + (i+1) + "/" + runs); long start = System.currentTimeMillis(); compress(srcFile, destFile, compressionCodec); accumulatedCompressMillis += System.currentTimeMillis() - start; start = System.currentTimeMillis(); decompress(destFile, uncompressedDestFile, compressionCodec); accumulatedDecompressMillis += System.currentTimeMillis() - start; } if(!trial) { dumpStats(codec, runs, binary, accumulatedCompressMillis / runs, accumulatedDecompressMillis / runs, destFile.length(), srcFile.length()); } } public static void dumpHeader() { System.out.printf("%-50s %5s %8s %12s %12s %12s %12s %11s\n", "codec", "runs", "type", "comp time", "decomp time", "orig size", "comp size", "comp per"); } public static void dumpStats( Codec codec, int runs, boolean binaryFile, long compressionMillis, long decompressionMillis, long compressedFileSize, long originalFileSize) { System.out.printf("%-50s %5d %8s %12d %12d %12d %12d %10.2f\n", codec.name(), runs, binaryFile ? "binary":"ascii", compressionMillis, decompressionMillis, originalFileSize, compressedFileSize, 100.0 - (double) compressedFileSize * 100 / (double) originalFileSize ); } public static void compress(File src, File dest, CompressionCodec codec) throws IOException { InputStream is = null; OutputStream os = null; try { is = new FileInputStream(src); os = codec.createOutputStream(new FileOutputStream(dest), codec.createCompressor()); IOUtils.copy(is, os); } finally { IOUtils.closeQuietly(os); IOUtils.closeQuietly(is); } } public static void decompress(File src, File dest, CompressionCodec codec) throws IOException { InputStream is = null; OutputStream os = null; try { is = codec.createInputStream(new FileInputStream(src), codec.createDecompressor()); os = new FileOutputStream(dest); IOUtils.copy(is, os); } finally { IOUtils.closeQuietly(os); IOUtils.closeQuietly(is); } } }