TestLoadIncrementalHFiles.java example

Explorer
hbase-master
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles.LoadQueueItem;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MapReduceTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;

/**
 * Test cases for the "load" half of the HFileOutputFormat bulk load
 * functionality. These tests run faster than the full MR cluster
 * tests in TestHFileOutputFormat
 */
@Category({MapReduceTests.class, LargeTests.class})
public class TestLoadIncrementalHFiles {
  @Rule
  public TestName tn = new TestName();

  private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
  private static final byte[] FAMILY = Bytes.toBytes("myfam");
  private static final String NAMESPACE = "bulkNS";

  static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
  static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;

  private static final byte[][] SPLIT_KEYS = new byte[][] {
    Bytes.toBytes("ddd"),
    Bytes.toBytes("ppp")
  };

  static HBaseTestingUtility util = new HBaseTestingUtility();

  @BeforeClass
  public static void setUpBeforeClass() throws Exception {
    util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
    util.getConfiguration().setInt(
      LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
      MAX_FILES_PER_REGION_PER_FAMILY);
    // change default behavior so that tag values are returned with normal rpcs
    util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
        KeyValueCodecWithTags.class.getCanonicalName());
    util.startMiniCluster();

    setupNamespace();
  }

  protected static void setupNamespace() throws Exception {
    util.getAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
  }

  @AfterClass
  public static void tearDownAfterClass() throws Exception {
    util.shutdownMiniCluster();
  }

  @Test(timeout = 120000)
  public void testSimpleLoadWithMap() throws Exception {
    runTest("testSimpleLoadWithMap", BloomType.NONE,
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
          new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
    },  true);
  }

  /**
   * Test case that creates some regions and loads
   * HFiles that fit snugly inside those regions
   */
  @Test(timeout = 120000)
  public void testSimpleLoad() throws Exception {
    runTest("testSimpleLoad", BloomType.NONE,
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
          new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
    });
  }

  @Test(timeout = 120000)
  public void testSimpleLoadWithFileCopy() throws Exception {
    String testName = tn.getMethodName();
    final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
    runTest(testName, buildHTD(TableName.valueOf(TABLE_NAME), BloomType.NONE), BloomType.NONE,
        false, null, new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
          new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
    }, false, true);
  }

  /**
   * Test case that creates some regions and loads
   * HFiles that cross the boundaries of those regions
   */
  @Test(timeout = 120000)
  public void testRegionCrossingLoad() throws Exception {
    runTest("testRegionCrossingLoad", BloomType.NONE,
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
          new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
    });
  }

  /**
   * Test loading into a column family that has a ROW bloom filter.
   */
  @Test(timeout = 60000)
  public void testRegionCrossingRowBloom() throws Exception {
    runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
          new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
    });
  }

  /**
   * Test loading into a column family that has a ROWCOL bloom filter.
   */
  @Test(timeout = 120000)
  public void testRegionCrossingRowColBloom() throws Exception {
    runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
          new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
    });
  }

  /**
   * Test case that creates some regions and loads HFiles that have
   * different region boundaries than the table pre-split.
   */
  @Test(timeout = 120000)
  public void testSimpleHFileSplit() throws Exception {
    runTest("testHFileSplit", BloomType.NONE,
        new byte[][] {
          Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
          Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
        },
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
          new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
        }
    );
  }

  /**
   * Test case that creates some regions and loads HFiles that cross the boundaries
   * and have different region boundaries than the table pre-split.
   */
  @Test(timeout = 60000)
  public void testRegionCrossingHFileSplit() throws Exception {
    testRegionCrossingHFileSplit(BloomType.NONE);
  }

  /**
   * Test case that creates some regions and loads HFiles that cross the boundaries
   * have a ROW bloom filter and a different region boundaries than the table pre-split.
   */
  @Test(timeout = 120000)
  public void testRegionCrossingHFileSplitRowBloom() throws Exception {
    testRegionCrossingHFileSplit(BloomType.ROW);
  }

  /**
   * Test case that creates some regions and loads HFiles that cross the boundaries
   * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
   */
  @Test(timeout = 120000)
  public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
    testRegionCrossingHFileSplit(BloomType.ROWCOL);
  }

  @Test
  public void testSplitALot() throws Exception {
    runTest("testSplitALot", BloomType.NONE,
      new byte[][] {
        Bytes.toBytes("aaaa"), Bytes.toBytes("bbb"),
        Bytes.toBytes("ccc"), Bytes.toBytes("ddd"),
        Bytes.toBytes("eee"), Bytes.toBytes("fff"),
        Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
        Bytes.toBytes("iii"), Bytes.toBytes("lll"),
        Bytes.toBytes("mmm"), Bytes.toBytes("nnn"),
        Bytes.toBytes("ooo"), Bytes.toBytes("ppp"),
        Bytes.toBytes("qqq"), Bytes.toBytes("rrr"),
        Bytes.toBytes("sss"), Bytes.toBytes("ttt"),
        Bytes.toBytes("uuu"), Bytes.toBytes("vvv"),
        Bytes.toBytes("zzz"),
      },
      new byte[][][] {
        new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("zzz") },
      }
    );
  }

  private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
    runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
        new byte[][] {
          Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
          Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
        },
        new byte[][][] {
          new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
          new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
        }
    );
  }

  private HTableDescriptor buildHTD(TableName tableName, BloomType bloomType) {
    HTableDescriptor htd = new HTableDescriptor(tableName);
    HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
    familyDesc.setBloomFilterType(bloomType);
    htd.addFamily(familyDesc);
    return htd;
  }

  private void runTest(String testName, BloomType bloomType,
      byte[][][] hfileRanges) throws Exception {
    runTest(testName, bloomType, null, hfileRanges);
  }

  private void runTest(String testName, BloomType bloomType,
      byte[][][] hfileRanges, boolean useMap) throws Exception {
    runTest(testName, bloomType, null, hfileRanges, useMap);
  }

  private void runTest(String testName, BloomType bloomType,
      byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
    runTest(testName, bloomType, tableSplitKeys, hfileRanges, false);
  }

  private void runTest(String testName, BloomType bloomType,
      byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap) throws Exception {
    final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
    final boolean preCreateTable = tableSplitKeys != null;

    // Run the test bulkloading the table to the default namespace
    final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
    runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
        useMap);

    // Run the test bulkloading the table to the specified namespace
    final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
    runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
        useMap);
  }

  private void runTest(String testName, TableName tableName, BloomType bloomType,
      boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap)
          throws Exception {
    HTableDescriptor htd = buildHTD(tableName, bloomType);
    runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges, useMap, false);
  }

  public static int loadHFiles(String testName, HTableDescriptor htd, HBaseTestingUtility util,
      byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
      byte[][][] hfileRanges, boolean useMap, boolean deleteFile,
      boolean copyFiles, int initRowCount, int factor) throws Exception {
    Path dir = util.getDataTestDirOnTestFS(testName);
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(fam));

    int hfileIdx = 0;
    Map<byte[], List<Path>> map = null;
    List<Path> list = null;
    if (useMap || copyFiles) {
      list = new ArrayList<>();
    }
    if (useMap) {
      map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
      map.put(fam, list);
    }
    Path last = null;
    for (byte[][] range : hfileRanges) {
      byte[] from = range[0];
      byte[] to = range[1];
      Path path = new Path(familyDir, "hfile_" + hfileIdx++);
      HFileTestUtil.createHFile(util.getConfiguration(), fs, path, fam, qual, from, to, factor);
      if (useMap) {
        last = path;
        list.add(path);
      }
    }
    int expectedRows = hfileIdx * factor;

    final TableName tableName = htd.getTableName();
    if (!util.getHBaseAdmin().tableExists(tableName) && (preCreateTable || map != null)) {
      util.getAdmin().createTable(htd, tableSplitKeys);
    }

    Configuration conf = util.getConfiguration();
    if (copyFiles) {
      conf.setBoolean(LoadIncrementalHFiles.ALWAYS_COPY_FILES, true);
    }
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
    String [] args= {dir.toString(), tableName.toString()};
    if (useMap) {
      if (deleteFile) fs.delete(last);
      Map<LoadQueueItem, ByteBuffer> loaded = loader.run(null, map, tableName);
      if (deleteFile) {
        expectedRows -= 1000;
        for (LoadQueueItem item : loaded.keySet()) {
          if (item.hfilePath.getName().equals(last.getName())) {
            fail(last + " should be missing");
          }
        }
      }
    } else {
      loader.run(args);
    }

    if (copyFiles) {
      for (Path p : list) {
        assertTrue(p + " should exist", fs.exists(p));
      }
    }

    Table table = util.getConnection().getTable(tableName);
    try {
      assertEquals(initRowCount + expectedRows, util.countRows(table));
    } finally {
      table.close();
    }

    return expectedRows;
  }

  private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
      boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap,
      boolean copyFiles) throws Exception {
    loadHFiles(testName, htd, util, FAMILY, QUALIFIER, preCreateTable, tableSplitKeys,
        hfileRanges, useMap, true, copyFiles, 0, 1000);

    final TableName tableName = htd.getTableName();
    // verify staging folder has been cleaned up
    Path stagingBasePath = new Path(FSUtils.getRootDir(util.getConfiguration()), HConstants.BULKLOAD_STAGING_DIR_NAME);
    FileSystem fs = util.getTestFileSystem();
    if(fs.exists(stagingBasePath)) {
      FileStatus[] files = fs.listStatus(stagingBasePath);
      for(FileStatus file : files) {
        assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
          file.getPath().getName() != "DONOTERASE");
      }
    }

    util.deleteTable(tableName);
  }

  /**
   * Test that tags survive through a bulk load that needs to split hfiles.
   *
   * This test depends on the "hbase.client.rpc.codec" =  KeyValueCodecWithTags so that the client
   * can get tags in the responses.
   */
  @Test(timeout = 60000)
  public void testTagsSurviveBulkLoadSplit() throws Exception {
    Path dir = util.getDataTestDirOnTestFS(tn.getMethodName());
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
    // table has these split points
    byte [][] tableSplitKeys = new byte[][] {
            Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
            Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
    };

    // creating an hfile that has values that span the split points.
    byte[] from = Bytes.toBytes("ddd");
    byte[] to = Bytes.toBytes("ooo");
    HFileTestUtil.createHFileWithTags(util.getConfiguration(), fs,
        new Path(familyDir, tn.getMethodName()+"_hfile"),
        FAMILY, QUALIFIER, from, to, 1000);
    int expectedRows = 1000;

    TableName tableName = TableName.valueOf(tn.getMethodName());
    HTableDescriptor htd = buildHTD(tableName, BloomType.NONE);
    util.getAdmin().createTable(htd, tableSplitKeys);

    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
    String [] args= {dir.toString(), tableName.toString()};
    loader.run(args);

    Table table = util.getConnection().getTable(tableName);
    try {
      assertEquals(expectedRows, util.countRows(table));
      HFileTestUtil.verifyTags(table);
    } finally {
      table.close();
    }

    util.deleteTable(tableName);
  }

  /**
   * Test loading into a column family that does not exist.
   */
  @Test(timeout = 60000)
  public void testNonexistentColumnFamilyLoad() throws Exception {
    String testName = tn.getMethodName();
    byte[][][] hFileRanges = new byte[][][] {
      new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
      new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
    };

    final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
    // set real family name to upper case in purpose to simulate the case that
    // family name in HFiles is invalid
    HColumnDescriptor family =
        new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase(Locale.ROOT)));
    htd.addFamily(family);

    try {
      runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges, false, false);
      assertTrue("Loading into table with non-existent family should have failed", false);
    } catch (Exception e) {
      assertTrue("IOException expected", e instanceof IOException);
      // further check whether the exception message is correct
      String errMsg = e.getMessage();
      assertTrue("Incorrect exception message, expected message: ["
          + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
          errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
    }
  }

  @Test(timeout = 120000)
  public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
    testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
  }

  @Test(timeout = 120000)
  public void testNonHfileFolder() throws Exception {
    testNonHfileFolder("testNonHfileFolder", false);
  }

  /**
   * Write a random data file and a non-file in a dir with a valid family name
   * but not part of the table families. we should we able to bulkload without
   * getting the unmatched family exception. HBASE-13037/HBASE-13227
   */
  private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
    Path dir = util.getDataTestDirOnTestFS(tableName);
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);

    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
    HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"),
        FAMILY, QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
    createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);

    final String NON_FAMILY_FOLDER = "_logs";
    Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
    fs.mkdirs(nonFamilyDir);
    fs.mkdirs(new Path(nonFamilyDir, "non-file"));
    createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);

    Table table = null;
    try {
      if (preCreateTable) {
        table = util.createTable(TableName.valueOf(tableName), FAMILY);
      } else {
        table = util.getConnection().getTable(TableName.valueOf(tableName));
      }

      final String[] args = {dir.toString(), tableName};
      new LoadIncrementalHFiles(util.getConfiguration()).run(args);
      assertEquals(500, util.countRows(table));
    } finally {
      if (table != null) {
        table.close();
      }
      fs.delete(dir, true);
    }
  }

  private static void createRandomDataFile(FileSystem fs, Path path, int size)
      throws IOException {
    FSDataOutputStream stream = fs.create(path);
    try {
      byte[] data = new byte[1024];
      for (int i = 0; i < data.length; ++i) {
        data[i] = (byte)(i & 0xff);
      }
      while (size >= data.length) {
        stream.write(data, 0, data.length);
        size -= data.length;
      }
      if (size > 0) {
        stream.write(data, 0, size);
      }
    } finally {
      stream.close();
    }
  }

  @Test(timeout = 120000)
  public void testSplitStoreFile() throws IOException {
    Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
    FileSystem fs = util.getTestFileSystem();
    Path testIn = new Path(dir, "testhfile");
    HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
    HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
        Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);

    Path bottomOut = new Path(dir, "bottom.out");
    Path topOut = new Path(dir, "top.out");

    LoadIncrementalHFiles.splitStoreFile(
        util.getConfiguration(), testIn,
        familyDesc, Bytes.toBytes("ggg"),
        bottomOut,
        topOut);

    int rowCount = verifyHFile(bottomOut);
    rowCount += verifyHFile(topOut);
    assertEquals(1000, rowCount);
  }

  @Test
  public void testSplitStoreFileWithNoneToNone() throws IOException {
    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.NONE);
  }

  @Test
  public void testSplitStoreFileWithEncodedToEncoded() throws IOException {
    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.DIFF);
  }

  @Test
  public void testSplitStoreFileWithEncodedToNone() throws IOException {
    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.NONE);
  }

  @Test
  public void testSplitStoreFileWithNoneToEncoded() throws IOException {
    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.DIFF);
  }

  private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding,
      DataBlockEncoding cfEncoding) throws IOException {
    Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
    FileSystem fs = util.getTestFileSystem();
    Path testIn = new Path(dir, "testhfile");
    HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
    familyDesc.setDataBlockEncoding(cfEncoding);
    HFileTestUtil.createHFileWithDataBlockEncoding(
        util.getConfiguration(), fs, testIn, bulkloadEncoding,
        FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);

    Path bottomOut = new Path(dir, "bottom.out");
    Path topOut = new Path(dir, "top.out");

    LoadIncrementalHFiles.splitStoreFile(
        util.getConfiguration(), testIn,
        familyDesc, Bytes.toBytes("ggg"),
        bottomOut,
        topOut);

    int rowCount = verifyHFile(bottomOut);
    rowCount += verifyHFile(topOut);
    assertEquals(1000, rowCount);
  }

  private int verifyHFile(Path p) throws IOException {
    Configuration conf = util.getConfiguration();
    HFile.Reader reader =
        HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
    reader.loadFileInfo();
    HFileScanner scanner = reader.getScanner(false, false);
    scanner.seekTo();
    int count = 0;
    do {
      count++;
    } while (scanner.next());
    assertTrue(count > 0);
    reader.close();
    return count;
  }

  private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
    Integer value = map.containsKey(first)?map.get(first):0;
    map.put(first, value+1);

    value = map.containsKey(last)?map.get(last):0;
    map.put(last, value-1);
  }

  @Test(timeout = 120000)
  public void testInferBoundaries() {
    TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);

    /* Toy example
     *     c---------i            o------p          s---------t     v------x
     * a------e    g-----k   m-------------q   r----s            u----w
     *
     * Should be inferred as:
     * a-----------------k   m-------------q   r--------------t  u---------x
     *
     * The output should be (m,r,u)
     */

    String first;
    String last;

    first = "a"; last = "e";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "r"; last = "s";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "o"; last = "p";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "g"; last = "k";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "v"; last = "x";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "c"; last = "i";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "m"; last = "q";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "s"; last = "t";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    first = "u"; last = "w";
    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());

    byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
    byte[][] compare = new byte[3][];
    compare[0] = "m".getBytes();
    compare[1] = "r".getBytes();
    compare[2] = "u".getBytes();

    assertEquals(keysArray.length, 3);

    for (int row = 0; row<keysArray.length; row++){
      assertArrayEquals(keysArray[row], compare[row]);
    }
  }

  @Test(timeout = 60000)
  public void testLoadTooMayHFiles() throws Exception {
    Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(FAMILY));

    byte[] from = Bytes.toBytes("begin");
    byte[] to = Bytes.toBytes("end");
    for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
      HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
          + i), FAMILY, QUALIFIER, from, to, 1000);
    }

    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
    String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
    try {
      loader.run(args);
      fail("Bulk loading too many files should fail");
    } catch (IOException ie) {
      assertTrue(ie.getMessage().contains("Trying to load more than "
        + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
    }
  }

  @Test(expected = TableNotFoundException.class)
  public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
    String[] args = { "directory", "nonExistingTable" };
    loader.run(args);
  }

  @Test(timeout = 120000)
  public void testTableWithCFNameStartWithUnderScore() throws Exception {
    Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    String family = "_cf";
    Path familyDir = new Path(dir, family);

    byte[] from = Bytes.toBytes("begin");
    byte[] to = Bytes.toBytes("end");
    Configuration conf = util.getConfiguration();
    String tableName = tn.getMethodName();
    Table table = util.createTable(TableName.valueOf(tableName), family);
    HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
      QUALIFIER, from, to, 1000);

    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
    String[] args = { dir.toString(), tableName };
    try {
      loader.run(args);
      assertEquals(1000, util.countRows(table));
    } finally {
      if (null != table) {
        table.close();
      }
    }
  }
}