TestWALRecordReader.java example

Explorer
hbase-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALKeyRecordReader;
import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALRecordReader;
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.apache.hadoop.hbase.wal.WALKey;
import org.apache.hadoop.hbase.testclassification.MapReduceTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.MapReduceTestUtil;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;

/**
 * JUnit tests for the WALRecordReader
 */
@Category({MapReduceTests.class, MediumTests.class})
public class TestWALRecordReader {
  private static final Log LOG = LogFactory.getLog(TestWALRecordReader.class);
  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
  private static Configuration conf;
  private static FileSystem fs;
  private static Path hbaseDir;
  private static FileSystem walFs;
  private static Path walRootDir;
  // visible for TestHLogRecordReader
  static final TableName tableName = TableName.valueOf(getName());
  private static final byte [] rowName = tableName.getName();
  // visible for TestHLogRecordReader
  static final HRegionInfo info = new HRegionInfo(tableName,
      Bytes.toBytes(""), Bytes.toBytes(""), false);
  private static final byte [] family = Bytes.toBytes("column");
  private static final byte [] value = Bytes.toBytes("value");
  private static HTableDescriptor htd;
  private static Path logDir;
  protected MultiVersionConcurrencyControl mvcc;
  protected static NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);

  private static String getName() {
    return "TestWALRecordReader";
  }

  @Before
  public void setUp() throws Exception {
    fs.delete(hbaseDir, true);
    walFs.delete(walRootDir, true);
    mvcc = new MultiVersionConcurrencyControl();
  }
  @BeforeClass
  public static void setUpBeforeClass() throws Exception {
    // Make block sizes small.
    conf = TEST_UTIL.getConfiguration();
    conf.setInt("dfs.blocksize", 1024 * 1024);
    conf.setInt("dfs.replication", 1);
    TEST_UTIL.startMiniDFSCluster(1);

    conf = TEST_UTIL.getConfiguration();
    fs = TEST_UTIL.getDFSCluster().getFileSystem();

    hbaseDir = TEST_UTIL.createRootDir();
    walRootDir = TEST_UTIL.createWALRootDir();
    walFs = FSUtils.getWALFileSystem(conf);
    logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);

    htd = new HTableDescriptor(tableName);
    htd.addFamily(new HColumnDescriptor(family));
  }

  @AfterClass
  public static void tearDownAfterClass() throws Exception {
    fs.delete(hbaseDir, true);
    walFs.delete(walRootDir, true);
    TEST_UTIL.shutdownMiniCluster();
  }

  /**
   * Test partial reads from the log based on passed time range
   * @throws Exception
   */
  @Test
  public void testPartialRead() throws Exception {
    final WALFactory walfactory = new WALFactory(conf, null, getName());
    WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
    // This test depends on timestamp being millisecond based and the filename of the WAL also
    // being millisecond based.
    long ts = System.currentTimeMillis();
    WALEdit edit = new WALEdit();
    edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
    log.append(info, getWalKey(ts, scopes), edit, true);
    edit = new WALEdit();
    edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
    log.append(info, getWalKey(ts+1, scopes), edit, true);
    log.sync();
    LOG.info("Before 1st WAL roll " + log.toString());
    log.rollWriter();
    LOG.info("Past 1st WAL roll " + log.toString());

    Thread.sleep(1);
    long ts1 = System.currentTimeMillis();

    edit = new WALEdit();
    edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1+1, value));
    log.append(info, getWalKey(ts1+1, scopes), edit, true);
    edit = new WALEdit();
    edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1+2, value));
    log.append(info, getWalKey(ts1+2, scopes), edit, true);
    log.sync();
    log.shutdown();
    walfactory.shutdown();
    LOG.info("Closed WAL " + log.toString());


    WALInputFormat input = new WALInputFormat();
    Configuration jobConf = new Configuration(conf);
    jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
    jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);

    // only 1st file is considered, and only its 1st entry is used
    List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));

    assertEquals(1, splits.size());
    testSplit(splits.get(0), Bytes.toBytes("1"));

    jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
    jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
    // both files need to be considered
    assertEquals(2, splits.size());
    // only the 2nd entry from the 1st file is used
    testSplit(splits.get(0), Bytes.toBytes("2"));
    // only the 1nd entry from the 2nd file is used
    testSplit(splits.get(1), Bytes.toBytes("3"));
  }

  /**
   * Test basic functionality
   * @throws Exception
   */
  @Test
  public void testWALRecordReader() throws Exception {
    final WALFactory walfactory = new WALFactory(conf, null, getName());
    WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
    byte [] value = Bytes.toBytes("value");
    final AtomicLong sequenceId = new AtomicLong(0);
    WALEdit edit = new WALEdit();
    edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"),
        System.currentTimeMillis(), value));
    long txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
    log.sync(txid);

    Thread.sleep(1); // make sure 2nd log gets a later timestamp
    long secondTs = System.currentTimeMillis();
    log.rollWriter();

    edit = new WALEdit();
    edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"),
        System.currentTimeMillis(), value));
    txid = log.append(info, getWalKey(System.currentTimeMillis(), scopes), edit, true);
    log.sync(txid);
    log.shutdown();
    walfactory.shutdown();
    long thirdTs = System.currentTimeMillis();

    // should have 2 log files now
    WALInputFormat input = new WALInputFormat();
    Configuration jobConf = new Configuration(conf);
    jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());

    // make sure both logs are found
    List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
    assertEquals(2, splits.size());

    // should return exactly one KV
    testSplit(splits.get(0), Bytes.toBytes("1"));
    // same for the 2nd split
    testSplit(splits.get(1), Bytes.toBytes("2"));

    // now test basic time ranges:

    // set an endtime, the 2nd log file can be ignored completely.
    jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs-1);
    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
    assertEquals(1, splits.size());
    testSplit(splits.get(0), Bytes.toBytes("1"));

    // now set a start time
    jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
    jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
    splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
    // both logs need to be considered
    assertEquals(2, splits.size());
    // but both readers skip all edits
    testSplit(splits.get(0));
    testSplit(splits.get(1));
  }

  protected WALKey getWalKey(final long time, NavigableMap<byte[], Integer> scopes) {
    return new WALKey(info.getEncodedNameAsBytes(), tableName, time, mvcc, scopes);
  }

  protected WALRecordReader getReader() {
    return new WALKeyRecordReader();
  }

  /**
   * Create a new reader from the split, and match the edits against the passed columns.
   */
  private void testSplit(InputSplit split, byte[]... columns) throws Exception {
    final WALRecordReader reader = getReader();
    reader.initialize(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));

    for (byte[] column : columns) {
      assertTrue(reader.nextKeyValue());
      Cell cell = reader.getCurrentValue().getCells().get(0);
      if (!Bytes.equals(column, 0, column.length, cell.getQualifierArray(),
        cell.getQualifierOffset(), cell.getQualifierLength())) {
        assertTrue(
          "expected ["
              + Bytes.toString(column)
              + "], actual ["
              + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(),
                cell.getQualifierLength()) + "]", false);
      }
    }
    assertFalse(reader.nextKeyValue());
    reader.close();
  }

}