TestMultiColumnScanner.java example

Explorer
hbase-trunk-mttr-master
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

/**
 * Tests optimized scanning of multiple columns.
 */
@RunWith(Parameterized.class)
@Category(MediumTests.class)
public class TestMultiColumnScanner {

  private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class);

  private static final String TABLE_NAME =
      TestMultiColumnScanner.class.getSimpleName();

  static final int MAX_VERSIONS = 50;

  private static final String FAMILY = "CF";
  private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);

  /**
   * The size of the column qualifier set used. Increasing this parameter
   * exponentially increases test time.
   */
  private static final int NUM_COLUMNS = 8;

  private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1;
  private static final int NUM_FLUSHES = 10;
  private static final int NUM_ROWS = 20;

  /** A large value of type long for use as a timestamp */
  private static final long BIG_LONG = 9111222333444555666L;

  /**
   * Timestamps to test with. Cannot use {@link Long#MAX_VALUE} here, because
   * it will be replaced by an timestamp auto-generated based on the time.
   */
  private static final long[] TIMESTAMPS = new long[] { 1, 3, 5,
      Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 };

  /** The probability that a column is skipped in a store file. */
  private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;

  /** The probability of skipping a column in a single row */
  private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;

  /** The probability of skipping a column everywhere */
  private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;

  /** The probability to delete a row/column pair */
  private static final double DELETE_PROBABILITY = 0.02;

  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

  private final Compression.Algorithm comprAlgo;
  private final StoreFile.BloomType bloomType;
  private final DataBlockEncoding dataBlockEncoding;

  // Some static sanity-checking.
  static {
    assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE); // Guard against typos.

    // Ensure TIMESTAMPS are sorted.
    for (int i = 0; i < TIMESTAMPS.length - 1; ++i)
      assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
  }

  @Parameters
  public static final Collection<Object[]> parameters() {
    List<Object[]> parameters = new ArrayList<Object[]>();
    for (Object[] bloomAndCompressionParams : 
        HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
      for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
        parameters.add(ArrayUtils.add(bloomAndCompressionParams,
            useDataBlockEncoding));
      }
    }
    return parameters;
  }

  public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
      StoreFile.BloomType bloomType, boolean useDataBlockEncoding) {
    this.comprAlgo = comprAlgo;
    this.bloomType = bloomType;
    this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
        DataBlockEncoding.NONE;
  }

  @Test
  public void testMultiColumnScanner() throws IOException {
    HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME,
        new HColumnDescriptor(FAMILY)
            .setCompressionType(comprAlgo)
            .setBloomFilterType(bloomType)
            .setMaxVersions(MAX_VERSIONS)
            .setDataBlockEncoding(dataBlockEncoding)
    );
    List<String> rows = sequentialStrings("row", NUM_ROWS);
    List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS);
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    Set<String> keySet = new HashSet<String>();

    // A map from <row>_<qualifier> to the most recent delete timestamp for
    // that column.
    Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();

    Random rand = new Random(29372937L);
    Set<String> rowQualSkip = new HashSet<String>();

    // Skip some columns in some rows. We need to test scanning over a set
    // of columns when some of the columns are not there.
    for (String row : rows)
      for (String qual : qualifiers)
        if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
          LOG.info("Skipping " + qual + " in row " + row);
          rowQualSkip.add(rowQualKey(row, qual));
        }

    // Also skip some columns in all rows.
    for (String qual : qualifiers)
      if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
        LOG.info("Skipping " + qual + " in all rows");
        for (String row : rows)
          rowQualSkip.add(rowQualKey(row, qual));
      }

    for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
      for (String qual : qualifiers) {
        // This is where we decide to include or not include this column into
        // this store file, regardless of row and timestamp.
        if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB)
          continue;

        byte[] qualBytes = Bytes.toBytes(qual);
        for (String row : rows) {
          Put p = new Put(Bytes.toBytes(row));
          for (long ts : TIMESTAMPS) {
            String value = createValue(row, qual, ts);
            KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts,
                value);
            assertEquals(kv.getTimestamp(), ts);
            p.add(kv);
            String keyAsString = kv.toString();
            if (!keySet.contains(keyAsString)) {
              keySet.add(keyAsString);
              kvs.add(kv);
            }
          }
          region.put(p);

          Delete d = new Delete(Bytes.toBytes(row));
          boolean deletedSomething = false;
          for (long ts : TIMESTAMPS)
            if (rand.nextDouble() < DELETE_PROBABILITY) {
              d.deleteColumns(FAMILY_BYTES, qualBytes, ts);
              String rowAndQual = row + "_" + qual;
              Long whenDeleted = lastDelTimeMap.get(rowAndQual);
              lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts
                  : Math.max(ts, whenDeleted));
              deletedSomething = true;
            }
          if (deletedSomething)
            region.delete(d, null, true);
        }
      }
      region.flushcache();
    }

    Collections.sort(kvs, KeyValue.COMPARATOR);
    for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) {
      for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) {
        Scan scan = new Scan();
        scan.setMaxVersions(maxVersions);
        Set<String> qualSet = new TreeSet<String>();
        {
          int columnMaskTmp = columnBitMask;
          for (String qual : qualifiers) {
            if ((columnMaskTmp & 1) != 0) {
              scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual));
              qualSet.add(qual);
            }
            columnMaskTmp >>= 1;
          }
          assertEquals(0, columnMaskTmp);
        }

        InternalScanner scanner = region.getScanner(scan);
        List<KeyValue> results = new ArrayList<KeyValue>();

        int kvPos = 0;
        int numResults = 0;
        String queryInfo = "columns queried: " + qualSet + " (columnBitMask="
            + columnBitMask + "), maxVersions=" + maxVersions;

        while (scanner.next(results) || results.size() > 0) {
          for (KeyValue kv : results) {
            while (kvPos < kvs.size()
                && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions,
                    lastDelTimeMap)) {
              ++kvPos;
            }
            String rowQual = getRowQualStr(kv);
            String deleteInfo = "";
            Long lastDelTS = lastDelTimeMap.get(rowQual);
            if (lastDelTS != null) {
              deleteInfo = "; last timestamp when row/column " + rowQual
                  + " was deleted: " + lastDelTS;
            }
            assertTrue("Scanner returned additional key/value: " + kv + ", "
                + queryInfo + deleteInfo + ";", kvPos < kvs.size());
            assertEquals("Scanner returned wrong key/value; " + queryInfo
                + deleteInfo + ";", kvs.get(kvPos), kv);
            ++kvPos;
            ++numResults;
          }
          results.clear();
        }
        for (; kvPos < kvs.size(); ++kvPos) {
          KeyValue remainingKV = kvs.get(kvPos);
          assertFalse("Matching column not returned by scanner: "
              + remainingKV + ", " + queryInfo + ", results returned: "
              + numResults, matchesQuery(remainingKV, qualSet, maxVersions,
              lastDelTimeMap));
        }
      }
    }
    assertTrue("This test is supposed to delete at least some row/column " +
        "pairs", lastDelTimeMap.size() > 0);
    LOG.info("Number of row/col pairs deleted at least once: " +
       lastDelTimeMap.size());
    HRegion.closeHRegion(region);
  }

  private static String getRowQualStr(KeyValue kv) {
    String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(),
        kv.getRowLength());
    String qualStr = Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
        kv.getQualifierLength());
    return rowStr + "_" + qualStr;
  }

  private static boolean matchesQuery(KeyValue kv, Set<String> qualSet,
      int maxVersions, Map<String, Long> lastDelTimeMap) {
    Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv));
    long ts = kv.getTimestamp();
    return qualSet.contains(qualStr(kv))
        && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions]
        && (lastDelTS == null || ts > lastDelTS);
  }

  private static String qualStr(KeyValue kv) {
    return Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
        kv.getQualifierLength());
  }

  private static String rowQualKey(String row, String qual) {
    return row + "_" + qual;
  }

  static String createValue(String row, String qual, long ts) {
    return "value_for_" + row + "_" + qual + "_" + ts;
  }

  private static List<String> sequentialStrings(String prefix, int n) {
    List<String> lst = new ArrayList<String>();
    for (int i = 0; i < n; ++i) {
      StringBuilder sb = new StringBuilder();
      sb.append(prefix + i);

      // Make column length depend on i.
      int iBitShifted = i;
      while (iBitShifted != 0) {
        sb.append((iBitShifted & 1) == 0 ? 'a' : 'b');
        iBitShifted >>= 1;
      }

      lst.add(sb.toString());
    }

    return lst;
  }


}