/* * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import org.apache.hadoop.io.Writable; import java.nio.ByteBuffer; /** * Defines the general behavior of a bloom filter. * <p> * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by * the networking research community in the past decade thanks to the bandwidth efficiencies that it * offers for the transmission of set membership information between networked hosts. A sender encodes * the information into a bit vector, the Bloom filter, that is more compact than a conventional * representation. Computation and space costs for construction are linear in the number of elements. * The receiver uses the filter to test whether various elements are members of the set. Though the * filter will occasionally return a false positive, it will never return a false negative. When creating * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. * * <p> * Originally created by * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>. * * <p> * It must be extended in order to define the real behavior. */ public interface BloomFilter { /** * Allocate memory for the bloom filter data. Note that bloom data isn't * allocated by default because it can grow large & reads would be better * managed by the LRU cache. */ void allocBloom(); /** * Add the specified binary to the bloom filter. * * @param buf data to be added to the bloom */ void add(byte []buf); /** * Add the specified binary to the bloom filter. * * @param buf data to be added to the bloom * @param offset offset into the data to be added * @param len length of the data to be added */ void add(byte []buf, int offset, int len); /** * Check if the specified key is contained in the bloom filter. * * @param buf data to check for existence of * @param bloom bloom filter data to search * @return true if matched by bloom, false if not */ boolean contains(byte [] buf, ByteBuffer bloom); /** * Check if the specified key is contained in the bloom filter. * * @param buf data to check for existence of * @param offset offset into the data * @param length length of the data * @param bloom bloom filter data to search * @return true if matched by bloom, false if not */ boolean contains(byte [] buf, int offset, int length, ByteBuffer bloom); /** * @return The number of keys added to the bloom */ int getKeyCount(); /** * @return The max number of keys that can be inserted * to maintain the desired error rate */ public int getMaxKeys(); /** * @return Size of the bloom, in bytes */ public int getByteSize(); /** * Compact the bloom before writing metadata & data to disk */ void compactBloom(); /** * Get a writable interface into bloom filter meta data. * @return writable class */ Writable getMetaWriter(); /** * Get a writable interface into bloom filter data (actual bloom). * @return writable class */ Writable getDataWriter(); }