/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.hbase.io.hfile; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Random; import java.util.StringTokenizer; import junit.framework.TestCase; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.io.BytesWritable; import org.junit.experimental.categories.Category; import org.mortbay.log.Log; /** * test the performance for seek. * <p> * Copied from * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>. * Remove after tfile is committed and use the tfile version of this class * instead.</p> */ @Category(MediumTests.class) public class TestHFileSeek extends TestCase { private static final boolean USE_PREAD = true; private MyOptions options; private Configuration conf; private Path path; private FileSystem fs; private NanoTimer timer; private Random rng; private RandomDistribution.DiscreteRNG keyLenGen; private KVGenerator kvGen; private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @Override public void setUp() throws IOException { if (options == null) { options = new MyOptions(new String[0]); } conf = new Configuration(); if (options.useRawFs) { conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class); } conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize); conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize); path = new Path(new Path(options.rootDir), options.file); fs = path.getFileSystem(conf); timer = new NanoTimer(false); rng = new Random(options.seed); keyLenGen = new RandomDistribution.Zipf(new Random(rng.nextLong()), options.minKeyLen, options.maxKeyLen, 1.2); RandomDistribution.DiscreteRNG valLenGen = new RandomDistribution.Flat(new Random(rng.nextLong()), options.minValLength, options.maxValLength); RandomDistribution.DiscreteRNG wordLenGen = new RandomDistribution.Flat(new Random(rng.nextLong()), options.minWordLen, options.maxWordLen); kvGen = new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen, options.dictSize); } @Override public void tearDown() { try { fs.close(); } catch (Exception e) { // Nothing } } private static FSDataOutputStream createFSOutput(Path name, FileSystem fs) throws IOException { if (fs.exists(name)) { fs.delete(name, true); } FSDataOutputStream fout = fs.create(name); return fout; } private void createTFile() throws IOException { long totalBytes = 0; FSDataOutputStream fout = createFSOutput(path, fs); try { Writer writer = HFile.getWriterFactoryNoCache(conf) .withOutputStream(fout) .withBlockSize(options.minBlockSize) .withCompression(options.compress) .create(); try { BytesWritable key = new BytesWritable(); BytesWritable val = new BytesWritable(); timer.start(); for (long i = 0; true; ++i) { if (i % 1000 == 0) { // test the size for every 1000 rows. if (fs.getFileStatus(path).getLen() >= options.fileSize) { break; } } kvGen.next(key, val, false); byte [] k = new byte [key.getLength()]; System.arraycopy(key.getBytes(), 0, k, 0, key.getLength()); byte [] v = new byte [val.getLength()]; System.arraycopy(val.getBytes(), 0, v, 0, key.getLength()); writer.append(k, v); totalBytes += key.getLength(); totalBytes += val.getLength(); } timer.stop(); } finally { writer.close(); } } finally { fout.close(); } double duration = (double)timer.read()/1000; // in us. long fsize = fs.getFileStatus(path).getLen(); System.out.printf( "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n", timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes / duration); System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n", timer.toString(), (double) fsize / 1024 / 1024, fsize / duration); } public void seekTFile() throws IOException { int miss = 0; long totalBytes = 0; FSDataInputStream fsdis = fs.open(path); Reader reader = HFile.createReaderFromStream(path, fsdis, fs.getFileStatus(path).getLen(), new CacheConfig(conf)); reader.loadFileInfo(); KeySampler kSampler = new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), keyLenGen); HFileScanner scanner = reader.getScanner(false, USE_PREAD); BytesWritable key = new BytesWritable(); timer.reset(); timer.start(); for (int i = 0; i < options.seekCount; ++i) { kSampler.next(key); byte [] k = new byte [key.getLength()]; System.arraycopy(key.getBytes(), 0, k, 0, key.getLength()); if (scanner.seekTo(k) >= 0) { ByteBuffer bbkey = scanner.getKey(); ByteBuffer bbval = scanner.getValue(); totalBytes += bbkey.limit(); totalBytes += bbval.limit(); } else { ++miss; } } timer.stop(); System.out.printf( "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n", timer.toString(), NanoTimer.nanoTimeToString(timer.read() / options.seekCount), options.seekCount - miss, miss, (double) totalBytes / 1024 / (options.seekCount - miss)); } public void testSeeks() throws IOException { if (options.doCreate()) { createTFile(); } if (options.doRead()) { seekTFile(); } if (options.doCreate()) { fs.delete(path, true); } } private static class IntegerRange { private final int from, to; public IntegerRange(int from, int to) { this.from = from; this.to = to; } public static IntegerRange parse(String s) throws ParseException { StringTokenizer st = new StringTokenizer(s, " \t,"); if (st.countTokens() != 2) { throw new ParseException("Bad integer specification: " + s); } int from = Integer.parseInt(st.nextToken()); int to = Integer.parseInt(st.nextToken()); return new IntegerRange(from, to); } public int from() { return from; } public int to() { return to; } } private static class MyOptions { // hard coded constants int dictSize = 1000; int minWordLen = 5; int maxWordLen = 20; private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); String rootDir = TEST_UTIL.getDataTestDir("TestTFileSeek").toString(); String file = "TestTFileSeek"; // String compress = "lzo"; DISABLED String compress = "none"; int minKeyLen = 10; int maxKeyLen = 50; int minValLength = 1024; int maxValLength = 2 * 1024; int minBlockSize = 1 * 1024 * 1024; int fsOutputBufferSize = 1; int fsInputBufferSize = 0; // Default writing 10MB. long fileSize = 10 * 1024 * 1024; long seekCount = 1000; long trialCount = 1; long seed; boolean useRawFs = false; static final int OP_CREATE = 1; static final int OP_READ = 2; int op = OP_CREATE | OP_READ; boolean proceed = false; public MyOptions(String[] args) { seed = System.nanoTime(); try { Options opts = buildOptions(); CommandLineParser parser = new GnuParser(); CommandLine line = parser.parse(opts, args, true); processOptions(line, opts); validateOptions(); } catch (ParseException e) { System.out.println(e.getMessage()); System.out.println("Try \"--help\" option for details."); setStopProceed(); } } public boolean proceed() { return proceed; } private Options buildOptions() { Option compress = OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]") .hasArg().withDescription("compression scheme").create('c'); Option fileSize = OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB") .hasArg().withDescription("target size of the file (in MB).") .create('s'); Option fsInputBufferSz = OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size") .hasArg().withDescription( "size of the file system input buffer (in bytes).").create( 'i'); Option fsOutputBufferSize = OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size") .hasArg().withDescription( "size of the file system output buffer (in bytes).").create( 'o'); Option keyLen = OptionBuilder .withLongOpt("key-length") .withArgName("min,max") .hasArg() .withDescription( "the length range of the key (in bytes)") .create('k'); Option valueLen = OptionBuilder .withLongOpt("value-length") .withArgName("min,max") .hasArg() .withDescription( "the length range of the value (in bytes)") .create('v'); Option blockSz = OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg() .withDescription("minimum block size (in KB)").create('b'); Option operation = OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg() .withDescription( "action: seek-only, create-only, seek-after-create").create( 'x'); Option rootDir = OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg() .withDescription( "specify root directory where files will be created.") .create('r'); Option file = OptionBuilder.withLongOpt("file").withArgName("name").hasArg() .withDescription("specify the file name to be created or read.") .create('f'); Option seekCount = OptionBuilder .withLongOpt("seek") .withArgName("count") .hasArg() .withDescription( "specify how many seek operations we perform (requires -x r or -x rw.") .create('n'); Option trialCount = OptionBuilder .withLongOpt("trials") .withArgName("n") .hasArg() .withDescription( "specify how many times to run the whole benchmark") .create('t'); Option useRawFs = OptionBuilder .withLongOpt("rawfs") .withDescription("use raw instead of checksummed file system") .create(); Option help = OptionBuilder.withLongOpt("help").hasArg(false).withDescription( "show this screen").create("h"); return new Options().addOption(compress).addOption(fileSize).addOption( fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen) .addOption(blockSz).addOption(rootDir).addOption(valueLen) .addOption(operation).addOption(seekCount).addOption(file) .addOption(trialCount).addOption(useRawFs).addOption(help); } private void processOptions(CommandLine line, Options opts) throws ParseException { // --help -h and --version -V must be processed first. if (line.hasOption('h')) { HelpFormatter formatter = new HelpFormatter(); System.out.println("TFile and SeqFile benchmark."); System.out.println(); formatter.printHelp(100, "java ... TestTFileSeqFileComparison [options]", "\nSupported options:", opts, ""); return; } if (line.hasOption('c')) { compress = line.getOptionValue('c'); } if (line.hasOption('d')) { dictSize = Integer.parseInt(line.getOptionValue('d')); } if (line.hasOption('s')) { fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024; } if (line.hasOption('i')) { fsInputBufferSize = Integer.parseInt(line.getOptionValue('i')); } if (line.hasOption('o')) { fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o')); } if (line.hasOption('n')) { seekCount = Integer.parseInt(line.getOptionValue('n')); } if (line.hasOption('t')) { trialCount = Integer.parseInt(line.getOptionValue('t')); } if (line.hasOption('k')) { IntegerRange ir = IntegerRange.parse(line.getOptionValue('k')); minKeyLen = ir.from(); maxKeyLen = ir.to(); } if (line.hasOption('v')) { IntegerRange ir = IntegerRange.parse(line.getOptionValue('v')); minValLength = ir.from(); maxValLength = ir.to(); } if (line.hasOption('b')) { minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024; } if (line.hasOption('r')) { rootDir = line.getOptionValue('r'); } if (line.hasOption('f')) { file = line.getOptionValue('f'); } if (line.hasOption('S')) { seed = Long.parseLong(line.getOptionValue('S')); } if (line.hasOption('x')) { String strOp = line.getOptionValue('x'); if (strOp.equals("r")) { op = OP_READ; } else if (strOp.equals("w")) { op = OP_CREATE; } else if (strOp.equals("rw")) { op = OP_CREATE | OP_READ; } else { throw new ParseException("Unknown action specifier: " + strOp); } } useRawFs = line.hasOption("rawfs"); proceed = true; } private void validateOptions() throws ParseException { if (!compress.equals("none") && !compress.equals("lzo") && !compress.equals("gz") && !compress.equals("snappy")) { throw new ParseException("Unknown compression scheme: " + compress); } if (minKeyLen >= maxKeyLen) { throw new ParseException( "Max key length must be greater than min key length."); } if (minValLength >= maxValLength) { throw new ParseException( "Max value length must be greater than min value length."); } if (minWordLen >= maxWordLen) { throw new ParseException( "Max word length must be greater than min word length."); } return; } private void setStopProceed() { proceed = false; } public boolean doCreate() { return (op & OP_CREATE) != 0; } public boolean doRead() { return (op & OP_READ) != 0; } } public static void main(String[] argv) throws IOException { TestHFileSeek testCase = new TestHFileSeek(); MyOptions options = new MyOptions(argv); if (options.proceed == false) { return; } testCase.options = options; for (int i = 0; i < options.trialCount; i++) { Log.info("Beginning trial " + (i+1)); testCase.setUp(); testCase.testSeeks(); testCase.tearDown(); } } }