BloomFilter.java example

Explorer
hadoop-hbase-master
- src
/*
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import org.apache.hadoop.io.Writable;

import java.nio.ByteBuffer;

/**
 * Defines the general behavior of a bloom filter.
 * <p>
 * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
 * the networking research community in the past decade thanks to the bandwidth efficiencies that it
 * offers for the transmission of set membership information between networked hosts.  A sender encodes
 * the information into a bit vector, the Bloom filter, that is more compact than a conventional
 * representation. Computation and space costs for construction are linear in the number of elements.
 * The receiver uses the filter to test whether various elements are members of the set. Though the
 * filter will occasionally return a false positive, it will never return a false negative. When creating
 * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
 *
 * <p>
 * Originally created by
 * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
 *
 * <p>
 * It must be extended in order to define the real behavior.
 */
public interface BloomFilter {
  /**
   * Allocate memory for the bloom filter data.  Note that bloom data isn't
   * allocated by default because it can grow large & reads would be better
   * managed by the LRU cache.
   */
  void allocBloom();

  /**
   * Add the specified binary to the bloom filter.
   *
   * @param buf data to be added to the bloom
   */
  void add(byte []buf);

  /**
   * Add the specified binary to the bloom filter.
   *
   * @param buf data to be added to the bloom
   * @param offset offset into the data to be added
   * @param len length of the data to be added
   */
  void add(byte []buf, int offset, int len);

  /**
   * Check if the specified key is contained in the bloom filter.
   *
   * @param buf data to check for existence of
   * @param bloom bloom filter data to search
   * @return true if matched by bloom, false if not
   */
  boolean contains(byte [] buf, ByteBuffer bloom);

  /**
   * Check if the specified key is contained in the bloom filter.
   *
   * @param buf data to check for existence of
   * @param offset offset into the data
   * @param length length of the data
   * @param bloom bloom filter data to search
   * @return true if matched by bloom, false if not
   */
  boolean contains(byte [] buf, int offset, int length, ByteBuffer bloom);

  /**
   * @return The number of keys added to the bloom
   */
  int getKeyCount();

  /**
   * @return The max number of keys that can be inserted
   *         to maintain the desired error rate
   */
  public int getMaxKeys();

  /**
   * @return Size of the bloom, in bytes
   */
  public int getByteSize();

  /**
   * Compact the bloom before writing metadata & data to disk
   */
  void compactBloom();

  /**
   * Get a writable interface into bloom filter meta data.
   * @return writable class
   */
  Writable getMetaWriter();

  /**
   * Get a writable interface into bloom filter data (actual bloom).
   * @return writable class
   */
  Writable getDataWriter();
}