/* * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; /** * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper. * <p> * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but * each of the <code>s</code> rows is a standard Bloom filter. The creation * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code> * bit matrix, i.e., it is composed of a single standard Bloom filter. * It assumes that <code>n<sub>r</sub></code> elements are recorded in the * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is * the cardinality of the set <code>A</code> to record in the filter). * <p> * As the size of <code>A</code> grows during the execution of the application, * several keys must be inserted in the DBF. When inserting a key into the DBF, * one must first get an active Bloom filter in the matrix. A Bloom filter is * active when the number of recorded keys, <code>n<sub>r</sub></code>, is * strictly less than the current cardinality of <code>A</code>, <code>n</code>. * If an active Bloom filter is found, the key is inserted and * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there * is no active Bloom filter, a new one is created (i.e., a new row is added to * the matrix) according to the current size of <code>A</code> and the element * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of * this new Bloom filter is set to one. A given key is said to belong to the * DBF if the <code>k</code> positions are set to one in one of the matrix rows. * <p> * Originally created by * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>. * * @see BloomFilter A Bloom filter * * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a> */ public class DynamicByteBloomFilter implements BloomFilter { /** Current file format version */ public static final int VERSION = 2; /** Maximum number of keys in a dynamic Bloom filter row. */ protected final int keyInterval; /** The maximum false positive rate per bloom */ protected final float errorRate; /** Hash type */ protected final int hashType; /** The number of keys recorded in the current Bloom filter. */ protected int curKeys; /** expected size of bloom filter matrix (used during reads) */ protected int readMatrixSize; /** The matrix of Bloom filters (contains bloom data only during writes). */ protected ByteBloomFilter[] matrix; /** * Normal read constructor. Loads bloom filter meta data. * @param meta stored bloom meta data * @throws IllegalArgumentException meta data is invalid */ public DynamicByteBloomFilter(ByteBuffer meta) throws IllegalArgumentException { int version = meta.getInt(); if (version != VERSION) throw new IllegalArgumentException("Bad version"); this.keyInterval = meta.getInt(); this.errorRate = meta.getFloat(); this.hashType = meta.getInt(); this.readMatrixSize = meta.getInt(); this.curKeys = meta.getInt(); readSanityCheck(); this.matrix = new ByteBloomFilter[1]; this.matrix[0] = new ByteBloomFilter(keyInterval, errorRate, hashType, 0); } /** * Normal write constructor. Note that this doesn't allocate bloom data by * default. Instead, call allocBloom() before adding entries. * @param errorRate * @param hashType type of the hashing function (see <code>org.apache.hadoop.util.hash.Hash</code>). * @param keyInterval Maximum number of keys to record per Bloom filter row. * @throws IllegalArgumentException The input parameters were invalid */ public DynamicByteBloomFilter(int keyInterval, float errorRate, int hashType) throws IllegalArgumentException { this.keyInterval = keyInterval; this.errorRate = errorRate; this.hashType = hashType; this.curKeys = 0; if(keyInterval <= 0) { throw new IllegalArgumentException("keyCount must be > 0"); } this.matrix = new ByteBloomFilter[1]; this.matrix[0] = new ByteBloomFilter(keyInterval, errorRate, hashType, 0); } @Override public void allocBloom() { this.matrix[0].allocBloom(); } void readSanityCheck() throws IllegalArgumentException { if (this.curKeys <= 0) { throw new IllegalArgumentException("last bloom's key count invalid"); } if (this.readMatrixSize <= 0) { throw new IllegalArgumentException("matrix size must be known"); } } @Override public void add(byte []buf, int offset, int len) { BloomFilter bf = getCurBloom(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; curKeys = 0; } bf.add(buf, offset, len); curKeys++; } @Override public void add(byte []buf) { add(buf, 0, buf.length); } /** * Should only be used in tests when writing a bloom filter. */ boolean contains(byte [] buf) { return contains(buf, 0, buf.length); } /** * Should only be used in tests when writing a bloom filter. */ boolean contains(byte [] buf, int offset, int length) { for (int i = 0; i < matrix.length; i++) { if (matrix[i].contains(buf, offset, length)) { return true; } } return false; } @Override public boolean contains(byte [] buf, ByteBuffer theBloom) { return contains(buf, 0, buf.length, theBloom); } @Override public boolean contains(byte[] buf, int offset, int length, ByteBuffer theBloom) { if(offset + length > buf.length) { return false; } // current version assumes uniform size int bytesPerBloom = this.matrix[0].getByteSize(); if(theBloom.limit() != bytesPerBloom * readMatrixSize) { throw new IllegalArgumentException("Bloom does not match expected size"); } ByteBuffer tmp = theBloom.duplicate(); // note: actually searching an array of blooms that have been serialized for (int m = 0; m < readMatrixSize; ++m) { tmp.position(m* bytesPerBloom); tmp.limit(tmp.position() + bytesPerBloom); boolean match = this.matrix[0].contains(buf, offset, length, tmp.slice()); if (match) { return true; } } // matched no bloom filters return false; } int bloomCount() { return Math.max(this.matrix.length, this.readMatrixSize); } @Override public int getKeyCount() { return (bloomCount()-1) * this.keyInterval + this.curKeys; } @Override public int getMaxKeys() { return bloomCount() * this.keyInterval; } @Override public int getByteSize() { return bloomCount() * this.matrix[0].getByteSize(); } @Override public void compactBloom() { } /** * Adds a new row to <i>this</i> dynamic Bloom filter. */ private void addRow() { ByteBloomFilter[] tmp = new ByteBloomFilter[matrix.length + 1]; for (int i = 0; i < matrix.length; i++) { tmp[i] = matrix[i]; } tmp[tmp.length-1] = new ByteBloomFilter(keyInterval, errorRate, hashType, 0); tmp[tmp.length-1].allocBloom(); matrix = tmp; } /** * Returns the currently-unfilled row in the dynamic Bloom Filter array. * @return BloomFilter The active standard Bloom filter. * <code>Null</code> otherwise. */ private BloomFilter getCurBloom() { if (curKeys >= keyInterval) { return null; } return matrix[matrix.length - 1]; } @Override public Writable getMetaWriter() { return new MetaWriter(); } @Override public Writable getDataWriter() { return new DataWriter(); } private class MetaWriter implements Writable { protected MetaWriter() {} @Override public void readFields(DataInput arg0) throws IOException { throw new IOException("Cant read with this class."); } @Override public void write(DataOutput out) throws IOException { out.writeInt(VERSION); out.writeInt(keyInterval); out.writeFloat(errorRate); out.writeInt(hashType); out.writeInt(matrix.length); out.writeInt(curKeys); } } private class DataWriter implements Writable { protected DataWriter() {} @Override public void readFields(DataInput arg0) throws IOException { throw new IOException("Cant read with this class."); } @Override public void write(DataOutput out) throws IOException { for (int i = 0; i < matrix.length; ++i) { matrix[i].writeBloom(out); } } } }