IntegrationTestLazyCfLoading.java example

Explorer
hbase-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase;

import java.security.InvalidParameterException;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.MultiThreadedWriter;
import org.apache.hadoop.hbase.util.RegionSplitter;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;

/**
 * Integration test that verifies lazy CF loading during scans by doing repeated scans
 * with this feature while multiple threads are continuously writing values; and
 * verifying the result.
 */
@Category(IntegrationTests.class)
public class IntegrationTestLazyCfLoading {
  private static final TableName TABLE_NAME =
      TableName.valueOf(IntegrationTestLazyCfLoading.class.getSimpleName());
  private static final String TIMEOUT_KEY = "hbase.%s.timeout";
  private static final String ENCODING_KEY = "hbase.%s.datablock.encoding";

  /** A soft test timeout; duration of the test, as such, depends on number of keys to put. */
  private static final int DEFAULT_TIMEOUT_MINUTES = 10;

  private static final int NUM_SERVERS = 1;
  /** Set regions per server low to ensure splits happen during test */
  private static final int REGIONS_PER_SERVER = 3;
  private static final int KEYS_TO_WRITE_PER_SERVER = 20000;
  private static final int WRITER_THREADS = 10;
  private static final int WAIT_BETWEEN_SCANS_MS = 1000;

  private static final Log LOG = LogFactory.getLog(IntegrationTestLazyCfLoading.class);
  private IntegrationTestingUtility util = new IntegrationTestingUtility();
  private final DataGenerator dataGen = new DataGenerator();

  /** Custom LoadTestDataGenerator. Uses key generation and verification from
   * LoadTestKVGenerator. Creates 3 column families; one with an integer column to
   * filter on, the 2nd one with an integer column that matches the first integer column (for
   * test-specific verification), and byte[] value that is used for general verification; and
   * the third one with just the value.
   */
  private static class DataGenerator extends LoadTestDataGenerator {
    private static final int MIN_DATA_SIZE = 4096;
    private static final int MAX_DATA_SIZE = 65536;
    public static final byte[] ESSENTIAL_CF = Bytes.toBytes("essential");
    public static final byte[] JOINED_CF1 = Bytes.toBytes("joined");
    public static final byte[] JOINED_CF2 = Bytes.toBytes("joined2");
    public static final byte[] FILTER_COLUMN = Bytes.toBytes("filter");
    public static final byte[] VALUE_COLUMN = Bytes.toBytes("val");
    public static final long ACCEPTED_VALUE = 1L;

    private static final Map<byte[], byte[][]> columnMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);

    private final AtomicLong expectedNumberOfKeys = new AtomicLong(0);
    private final AtomicLong totalNumberOfKeys = new AtomicLong(0);

    public DataGenerator() {
      super(MIN_DATA_SIZE, MAX_DATA_SIZE);
      columnMap.put(ESSENTIAL_CF, new byte[][] { FILTER_COLUMN });
      columnMap.put(JOINED_CF1, new byte[][] { FILTER_COLUMN, VALUE_COLUMN });
      columnMap.put(JOINED_CF2, new byte[][] { VALUE_COLUMN });
    }

    public long getExpectedNumberOfKeys() {
      return expectedNumberOfKeys.get();
    }

    public long getTotalNumberOfKeys() {
      return totalNumberOfKeys.get();
    }

    @Override
    public byte[] getDeterministicUniqueKey(long keyBase) {
      return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
    }

    @Override
    public byte[][] getColumnFamilies() {
      return columnMap.keySet().toArray(new byte[columnMap.size()][]);
    }

    @Override
    public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
      return columnMap.get(cf);
    }

    @Override
    public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
      if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
        // Random deterministic way to make some values "on" and others "off" for filters.
        long value = Long.parseLong(Bytes.toString(rowKey, 0, 4), 16) & ACCEPTED_VALUE;
        if (Bytes.BYTES_COMPARATOR.compare(cf, ESSENTIAL_CF) == 0) {
          totalNumberOfKeys.incrementAndGet();
          if (value == ACCEPTED_VALUE) {
            expectedNumberOfKeys.incrementAndGet();
          }
        }
        return Bytes.toBytes(value);
      } else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
        return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
      }
      String error = "Unknown column " + Bytes.toString(column);
      assert false : error;
      throw new InvalidParameterException(error);
    }

    @Override
    public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
      if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
        // Relies on the filter from getScanFilter being used.
        return Bytes.toLong(value) == ACCEPTED_VALUE;
      } else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
        return LoadTestKVGenerator.verify(value, rowKey, cf, column);
      }
      return false; // some bogus value from read, we don't expect any such thing.
    }

    @Override
    public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
      return columnMap.get(cf).length == columnSet.size();
    }

    public Filter getScanFilter() {
      SingleColumnValueFilter scf = new SingleColumnValueFilter(ESSENTIAL_CF, FILTER_COLUMN,
          CompareFilter.CompareOp.EQUAL, Bytes.toBytes(ACCEPTED_VALUE));
      scf.setFilterIfMissing(true);
      return scf;
    }
  }

  @Before
  public void setUp() throws Exception {
    LOG.info("Initializing cluster with " + NUM_SERVERS + " servers");
    util.initializeCluster(NUM_SERVERS);
    LOG.info("Done initializing cluster");
    createTable();
    // after table creation, ACLs need time to be propagated to RSs in a secure deployment
    // so we sleep a little bit because we don't have a good way to know when permissions
    // are received by RSs
    Thread.sleep(3000);
  }

  private void createTable() throws Exception {
    deleteTable();
    LOG.info("Creating table");
    Configuration conf = util.getConfiguration();
    String encodingKey = String.format(ENCODING_KEY, this.getClass().getSimpleName());
    DataBlockEncoding blockEncoding = DataBlockEncoding.valueOf(conf.get(encodingKey, "FAST_DIFF"));
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    for (byte[] cf : dataGen.getColumnFamilies()) {
      HColumnDescriptor hcd = new HColumnDescriptor(cf);
      hcd.setDataBlockEncoding(blockEncoding);
      htd.addFamily(hcd);
    }
    int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
    byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER);
    util.getAdmin().createTable(htd, splits);
    LOG.info("Created table");
  }

  private void deleteTable() throws Exception {
    if (util.getAdmin().tableExists(TABLE_NAME)) {
      LOG.info("Deleting table");
      util.deleteTable(TABLE_NAME);
      LOG.info("Deleted table");
    }
  }

  @After
  public void tearDown() throws Exception {
    deleteTable();
    LOG.info("Restoring the cluster");
    util.restoreCluster();
    LOG.info("Done restoring the cluster");
  }

  @Test
  public void testReadersAndWriters() throws Exception {
    Configuration conf = util.getConfiguration();
    String timeoutKey = String.format(TIMEOUT_KEY, this.getClass().getSimpleName());
    long maxRuntime = conf.getLong(timeoutKey, DEFAULT_TIMEOUT_MINUTES);
    long serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
    long keysToWrite = serverCount * KEYS_TO_WRITE_PER_SERVER;
    Connection connection = ConnectionFactory.createConnection(conf);
    Table table = connection.getTable(TABLE_NAME);

    // Create multi-threaded writer and start it. We write multiple columns/CFs and verify
    // their integrity, therefore multi-put is necessary.
    MultiThreadedWriter writer =
      new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
    writer.setMultiPut(true);

    LOG.info("Starting writer; the number of keys to write is " + keysToWrite);
    // TODO : Need to see if tag support has to be given here in the integration test suite
    writer.start(1, keysToWrite, WRITER_THREADS);

    // Now, do scans.
    long now = EnvironmentEdgeManager.currentTime();
    long timeLimit = now + (maxRuntime * 60000);
    boolean isWriterDone = false;
    while (now < timeLimit && !isWriterDone) {
      LOG.info("Starting the scan; wrote approximately "
        + dataGen.getTotalNumberOfKeys() + " keys");
      isWriterDone = writer.isDone();
      if (isWriterDone) {
        LOG.info("Scanning full result, writer is done");
      }
      Scan scan = new Scan();
      for (byte[] cf : dataGen.getColumnFamilies()) {
        scan.addFamily(cf);
      }
      scan.setFilter(dataGen.getScanFilter());
      scan.setLoadColumnFamiliesOnDemand(true);
      // The number of keys we can expect from scan - lower bound (before scan).
      // Not a strict lower bound - writer knows nothing about filters, so we report
      // this from generator. Writer might have generated the value but not put it yet.
      long onesGennedBeforeScan = dataGen.getExpectedNumberOfKeys();
      long startTs = EnvironmentEdgeManager.currentTime();
      ResultScanner results = table.getScanner(scan);
      long resultCount = 0;
      Result result = null;
      // Verify and count the results.
      while ((result = results.next()) != null) {
        boolean isOk = writer.verifyResultAgainstDataGenerator(result, true, true);
        Assert.assertTrue("Failed to verify [" + Bytes.toString(result.getRow())+ "]", isOk);
        ++resultCount;
      }
      long timeTaken = EnvironmentEdgeManager.currentTime() - startTs;
      // Verify the result count.
      long onesGennedAfterScan = dataGen.getExpectedNumberOfKeys();
      Assert.assertTrue("Read " + resultCount + " keys when at most " + onesGennedAfterScan
        + " were generated ", onesGennedAfterScan >= resultCount);
      if (isWriterDone) {
        Assert.assertTrue("Read " + resultCount + " keys; the writer is done and "
          + onesGennedAfterScan + " keys were generated", onesGennedAfterScan == resultCount);
      } else if (onesGennedBeforeScan * 0.9 > resultCount) {
        LOG.warn("Read way too few keys (" + resultCount + "/" + onesGennedBeforeScan
          + ") - there might be a problem, or the writer might just be slow");
      }
      LOG.info("Scan took " + timeTaken + "ms");
      if (!isWriterDone) {
        Thread.sleep(WAIT_BETWEEN_SCANS_MS);
        now = EnvironmentEdgeManager.currentTime();
      }
    }
    Assert.assertEquals("There are write failures", 0, writer.getNumWriteFailures());
    Assert.assertTrue("Writer is not done", isWriterDone);
    // Assert.fail("Boom!");
    connection.close();
  }
}