TestFilterList.java example

Explorer
pbase-master
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.filter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertNull;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.experimental.categories.Category;

import com.google.common.collect.Lists;

/**
 * Tests filter sets
 *
 */
@Category(SmallTests.class)
public class TestFilterList {
  static final int MAX_PAGES = 2;
  static final char FIRST_CHAR = 'a';
  static final char LAST_CHAR = 'e';
  static byte[] GOOD_BYTES = Bytes.toBytes("abc");
  static byte[] BAD_BYTES = Bytes.toBytes("def");


  @Test
  public void testAddFilter() throws Exception {
    Filter filter1 = new FirstKeyOnlyFilter();
    Filter filter2 = new FirstKeyOnlyFilter();

    FilterList filterList = new FilterList(filter1, filter2);
    filterList.addFilter(new FirstKeyOnlyFilter());

    filterList = new FilterList(Arrays.asList(filter1, filter2));
    filterList.addFilter(new FirstKeyOnlyFilter());

    filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2);
    filterList.addFilter(new FirstKeyOnlyFilter());

    filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(filter1, filter2));
    filterList.addFilter(new FirstKeyOnlyFilter());

  }


  /**
   * Test "must pass one"
   * @throws Exception
   */
  @Test
  public void testMPONE() throws Exception {
    mpOneTest(getFilterMPONE());
  }

  private Filter getFilterMPONE() {
    List<Filter> filters = new ArrayList<Filter>();
    filters.add(new PageFilter(MAX_PAGES));
    filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
    Filter filterMPONE =
      new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
    return filterMPONE;
  }

  private void mpOneTest(Filter filterMPONE) throws Exception {
    /* Filter must do all below steps:
     * <ul>
     * <li>{@link #reset()}</li>
     * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
     * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
     * if false, we will also call</li>
     * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
     * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
     * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
     * </li>
     * </ul>
    */
    filterMPONE.reset();
    assertFalse(filterMPONE.filterAllRemaining());

    /* Will pass both */
    byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
    for (int i = 0; i < MAX_PAGES - 1; i++) {
      assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
      KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
        Bytes.toBytes(i));
      assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
      assertFalse(filterMPONE.filterRow());
    }

    /* Only pass PageFilter */
    rowkey = Bytes.toBytes("z");
    assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
    KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
        Bytes.toBytes(0));
    assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
    assertFalse(filterMPONE.filterRow());

    /* reach MAX_PAGES already, should filter any rows */
    rowkey = Bytes.toBytes("yyy");
    assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
    kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0),
        Bytes.toBytes(0));
    assertFalse(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
    assertFalse(filterMPONE.filterRow());

    /* We should filter any row */
    rowkey = Bytes.toBytes("z");
    assertTrue(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
    assertTrue(filterMPONE.filterAllRemaining());
  }

  /**
   * Test "must pass all"
   * @throws Exception
   */
  @Test
  public void testMPALL() throws Exception {
    mpAllTest(getMPALLFilter());
  }

  private Filter getMPALLFilter() {
    List<Filter> filters = new ArrayList<Filter>();
    filters.add(new PageFilter(MAX_PAGES));
    filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
    Filter filterMPALL =
      new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);
    return filterMPALL;
  }

  private void mpAllTest(Filter filterMPALL) throws Exception {
    /* Filter must do all below steps:
     * <ul>
     * <li>{@link #reset()}</li>
     * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
     * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
     * if false, we will also call</li>
     * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
     * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
     * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
     * </li>
     * </ul>
    */
    filterMPALL.reset();
    assertFalse(filterMPALL.filterAllRemaining());
    byte [] rowkey = Bytes.toBytes("yyyyyyyyy");
    for (int i = 0; i < MAX_PAGES - 1; i++) {
      assertFalse(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
      KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
        Bytes.toBytes(i));
      assertTrue(Filter.ReturnCode.INCLUDE == filterMPALL.filterKeyValue(kv));
    }
    filterMPALL.reset();
    rowkey = Bytes.toBytes("z");
    assertTrue(filterMPALL.filterRowKey(rowkey, 0, rowkey.length));
    // Should fail here; row should be filtered out.
    KeyValue kv = new KeyValue(rowkey, rowkey, rowkey, rowkey);
    assertTrue(Filter.ReturnCode.NEXT_ROW == filterMPALL.filterKeyValue(kv));
  }

  /**
   * Test list ordering
   * @throws Exception
   */
  @Test
  public void testOrdering() throws Exception {
    orderingTest(getOrderingFilter());
  }

  public Filter getOrderingFilter() {
    List<Filter> filters = new ArrayList<Filter>();
    filters.add(new PrefixFilter(Bytes.toBytes("yyy")));
    filters.add(new PageFilter(MAX_PAGES));
    Filter filterMPONE =
      new FilterList(FilterList.Operator.MUST_PASS_ONE, filters);
    return filterMPONE;
  }

  public void orderingTest(Filter filterMPONE) throws Exception {
    /* Filter must do all below steps:
     * <ul>
     * <li>{@link #reset()}</li>
     * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
     * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
     * if false, we will also call</li>
     * <li>{@link #filterKeyValue(org.apache.hadoop.hbase.KeyValue)} -> true to drop this key/value</li>
     * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
     * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
     * </li>
     * </ul>
    */
    filterMPONE.reset();
    assertFalse(filterMPONE.filterAllRemaining());

    /* We should be able to fill MAX_PAGES without incrementing page counter */
    byte [] rowkey = Bytes.toBytes("yyyyyyyy");
    for (int i = 0; i < MAX_PAGES; i++) {
      assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
      KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
          Bytes.toBytes(i));
        assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
      assertFalse(filterMPONE.filterRow());
    }

    /* Now let's fill the page filter */
    rowkey = Bytes.toBytes("xxxxxxx");
    for (int i = 0; i < MAX_PAGES; i++) {
      assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
      KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
          Bytes.toBytes(i));
        assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
      assertFalse(filterMPONE.filterRow());
    }

    /* We should still be able to include even though page filter is at max */
    rowkey = Bytes.toBytes("yyy");
    for (int i = 0; i < MAX_PAGES; i++) {
      assertFalse(filterMPONE.filterRowKey(rowkey, 0, rowkey.length));
      KeyValue kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(i),
          Bytes.toBytes(i));
        assertTrue(Filter.ReturnCode.INCLUDE == filterMPONE.filterKeyValue(kv));
      assertFalse(filterMPONE.filterRow());
    }
  }

  /**
   * When we do a "MUST_PASS_ONE" (a logical 'OR') of the above two filters
   * we expect to get the same result as the 'prefix' only result.
   * @throws Exception
   */
  public void testFilterListTwoFiltersMustPassOne() throws Exception {
    byte[] r1 = Bytes.toBytes("Row1");
    byte[] r11 = Bytes.toBytes("Row11");
    byte[] r2 = Bytes.toBytes("Row2");
  
    FilterList flist = new FilterList(FilterList.Operator.MUST_PASS_ONE);
    flist.addFilter(new PrefixFilter(r1));
    flist.filterRowKey(r1, 0, r1.length);
    assertEquals(flist.filterKeyValue(new KeyValue(r1,r1,r1)), ReturnCode.INCLUDE);
    assertEquals(flist.filterKeyValue(new KeyValue(r11,r11,r11)), ReturnCode.INCLUDE);

    flist.reset();
    flist.filterRowKey(r2, 0, r2.length);
    assertEquals(flist.filterKeyValue(new KeyValue(r2,r2,r2)), ReturnCode.SKIP);
  
    flist = new FilterList(FilterList.Operator.MUST_PASS_ONE);
    flist.addFilter(new AlwaysNextColFilter());
    flist.addFilter(new PrefixFilter(r1));
    flist.filterRowKey(r1, 0, r1.length);
    assertEquals(flist.filterKeyValue(new KeyValue(r1,r1,r1)), ReturnCode.INCLUDE);
    assertEquals(flist.filterKeyValue(new KeyValue(r11,r11,r11)), ReturnCode.INCLUDE);

    flist.reset();
    flist.filterRowKey(r2, 0, r2.length);
    assertEquals(flist.filterKeyValue(new KeyValue(r2,r2,r2)), ReturnCode.SKIP);
  }

  /**
   * When we do a "MUST_PASS_ONE" (a logical 'OR') of the two filters
   * we expect to get the same result as the inclusive stop result.
   * @throws Exception
   */
  public void testFilterListWithInclusiveStopFilteMustPassOne() throws Exception {
    byte[] r1 = Bytes.toBytes("Row1");
    byte[] r11 = Bytes.toBytes("Row11");
    byte[] r2 = Bytes.toBytes("Row2");
  
    FilterList flist = new FilterList(FilterList.Operator.MUST_PASS_ONE);
    flist.addFilter(new AlwaysNextColFilter());
    flist.addFilter(new InclusiveStopFilter(r1));
    flist.filterRowKey(r1, 0, r1.length);
    assertEquals(flist.filterKeyValue(new KeyValue(r1,r1,r1)), ReturnCode.INCLUDE);
    assertEquals(flist.filterKeyValue(new KeyValue(r11,r11,r11)), ReturnCode.INCLUDE);

    flist.reset();
    flist.filterRowKey(r2, 0, r2.length);
    assertEquals(flist.filterKeyValue(new KeyValue(r2,r2,r2)), ReturnCode.SKIP);
  }

  public static class AlwaysNextColFilter extends FilterBase {
    public AlwaysNextColFilter() {
      super();
    }
    @Override
    public ReturnCode filterKeyValue(Cell v) {
      return ReturnCode.NEXT_COL;
    }
    public static AlwaysNextColFilter parseFrom(final byte [] pbBytes)
        throws DeserializationException {
      return new AlwaysNextColFilter();
    }
  }

  /**
   * Test serialization
   * @throws Exception
   */
  @Test
  public void testSerialization() throws Exception {
    List<Filter> filters = new ArrayList<Filter>();
    filters.add(new PageFilter(MAX_PAGES));
    filters.add(new WhileMatchFilter(new PrefixFilter(Bytes.toBytes("yyy"))));
    Filter filterMPALL =
      new FilterList(FilterList.Operator.MUST_PASS_ALL, filters);

    // Decompose filterMPALL to bytes.
    byte[] buffer = filterMPALL.toByteArray();

    // Recompose filterMPALL.
    FilterList newFilter = FilterList.parseFrom(buffer);

    // Run tests
    mpOneTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getFilterMPONE())));
    mpAllTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getMPALLFilter())));
    orderingTest(ProtobufUtil.toFilter(ProtobufUtil.toFilter(getOrderingFilter())));
  }

  /**
   * Test filterKeyValue logic.
   * @throws Exception
   */
  public void testFilterKeyValue() throws Exception {
    Filter includeFilter = new FilterBase() {
      @Override
      public Filter.ReturnCode filterKeyValue(Cell v) {
        return Filter.ReturnCode.INCLUDE;
      }
    };

    Filter alternateFilter = new FilterBase() {
      boolean returnInclude = true;

      @Override
      public Filter.ReturnCode filterKeyValue(Cell v) {
        Filter.ReturnCode returnCode = returnInclude ? Filter.ReturnCode.INCLUDE :
                                                       Filter.ReturnCode.SKIP;
        returnInclude = !returnInclude;
        return returnCode;
      }
    };

    Filter alternateIncludeFilter = new FilterBase() {
      boolean returnIncludeOnly = false;

      @Override
      public Filter.ReturnCode filterKeyValue(Cell v) {
        Filter.ReturnCode returnCode = returnIncludeOnly ? Filter.ReturnCode.INCLUDE :
                                                           Filter.ReturnCode.INCLUDE_AND_NEXT_COL;
        returnIncludeOnly = !returnIncludeOnly;
        return returnCode;
      }
    };

    // Check must pass one filter.
    FilterList mpOnefilterList = new FilterList(Operator.MUST_PASS_ONE,
        Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
    // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
    assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpOnefilterList.filterKeyValue(null));
    // INCLUDE, SKIP, INCLUDE. 
    assertEquals(Filter.ReturnCode.INCLUDE, mpOnefilterList.filterKeyValue(null));

    // Check must pass all filter.
    FilterList mpAllfilterList = new FilterList(Operator.MUST_PASS_ALL,
        Arrays.asList(new Filter[] { includeFilter, alternateIncludeFilter, alternateFilter }));
    // INCLUDE, INCLUDE, INCLUDE_AND_NEXT_COL.
    assertEquals(Filter.ReturnCode.INCLUDE_AND_NEXT_COL, mpAllfilterList.filterKeyValue(null));
    // INCLUDE, SKIP, INCLUDE. 
    assertEquals(Filter.ReturnCode.SKIP, mpAllfilterList.filterKeyValue(null));
  }

  /**
   * Test pass-thru of hints.
   */
  @Test
  public void testHintPassThru() throws Exception {

    final KeyValue minKeyValue = new KeyValue(Bytes.toBytes(0L), null, null);
    final KeyValue maxKeyValue = new KeyValue(Bytes.toBytes(Long.MAX_VALUE),
        null, null);

    Filter filterNoHint = new FilterBase() {
      @Override
      public byte [] toByteArray() {
        return null;
      }
      
      @Override
      public ReturnCode filterKeyValue(Cell ignored) throws IOException {
        return ReturnCode.INCLUDE;
      }
    };

    Filter filterMinHint = new FilterBase() {
      @Override
      public ReturnCode filterKeyValue(Cell ignored) {
        return ReturnCode.SEEK_NEXT_USING_HINT;
      }

      @Override
      public Cell getNextCellHint(Cell currentKV) {
        return minKeyValue;
      }

      @Override
      public byte [] toByteArray() {return null;}
    };

    Filter filterMaxHint = new FilterBase() {
      @Override
      public ReturnCode filterKeyValue(Cell ignored) {
        return ReturnCode.SEEK_NEXT_USING_HINT;
      }

      @Override
      public Cell getNextCellHint(Cell currentKV) {
        return new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null);
      }

      @Override
      public byte [] toByteArray() {return null;}
    };

    // MUST PASS ONE

    // Should take the min if given two hints
    FilterList filterList = new FilterList(Operator.MUST_PASS_ONE,
        Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        minKeyValue));

    // Should have no hint if any filter has no hint
    filterList = new FilterList(Operator.MUST_PASS_ONE,
        Arrays.asList(
            new Filter [] { filterMinHint, filterMaxHint, filterNoHint } ));
    assertNull(filterList.getNextKeyHint(null));
    filterList = new FilterList(Operator.MUST_PASS_ONE,
        Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
    assertNull(filterList.getNextKeyHint(null));

    // Should give max hint if its the only one
    filterList = new FilterList(Operator.MUST_PASS_ONE,
        Arrays.asList(new Filter [] { filterMaxHint, filterMaxHint } ));
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        maxKeyValue));

    // MUST PASS ALL

    // Should take the first hint
    filterList = new FilterList(Operator.MUST_PASS_ALL,
        Arrays.asList(new Filter [] { filterMinHint, filterMaxHint } ));
    filterList.filterKeyValue(null);
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        minKeyValue));

    filterList = new FilterList(Operator.MUST_PASS_ALL,
        Arrays.asList(new Filter [] { filterMaxHint, filterMinHint } ));
    filterList.filterKeyValue(null);
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        maxKeyValue));

    // Should have first hint even if a filter has no hint
    filterList = new FilterList(Operator.MUST_PASS_ALL,
        Arrays.asList(
            new Filter [] { filterNoHint, filterMinHint, filterMaxHint } ));
    filterList.filterKeyValue(null);
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        minKeyValue));
    filterList = new FilterList(Operator.MUST_PASS_ALL,
        Arrays.asList(new Filter [] { filterNoHint, filterMaxHint } ));
    filterList.filterKeyValue(null);
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        maxKeyValue));
    filterList = new FilterList(Operator.MUST_PASS_ALL,
        Arrays.asList(new Filter [] { filterNoHint, filterMinHint } ));
    filterList.filterKeyValue(null);
    assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null),
        minKeyValue));
  }

  /**
   * Tests the behavior of transform() in a hierarchical filter.
   *
   * transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
   * Lazy evaluation of AND
   */
  @Test
  public void testTransformMPO() throws Exception {
    // Apply the following filter:
    //     (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
    //  OR (family=fam AND qualifier=qual2)
    final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
        new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
            new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
            new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
            new KeyOnlyFilter())),
        new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
            new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
            new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));

    final KeyValue kvQual1 = new KeyValue(
        Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
    final KeyValue kvQual2 = new KeyValue(
        Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
    final KeyValue kvQual3 = new KeyValue(
        Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));

    // Value for fam:qual1 should be stripped:
    assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
    final KeyValue transformedQual1 = KeyValueUtil.ensureKeyValue(flist.transform(kvQual1));
    assertEquals(0, transformedQual1.getValue().length);

    // Value for fam:qual2 should not be stripped:
    assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
    final KeyValue transformedQual2 = KeyValueUtil.ensureKeyValue(flist.transform(kvQual2));
    assertEquals("value", Bytes.toString(transformedQual2.getValue()));

    // Other keys should be skipped:
    assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
  }

}