package org.xlattice.crypto.filters; import java.util.Arrays; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; /** * A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set * of k hash functions to determine set membership. Each hash function * produces a value in the range 0..M-1. The filter is of size M. To * add a member to the set, apply each function to the new member and * set the corresponding bit in the filter. For M very large relative * to k, this will normally set k bits in the filter. To check whether * x is a member of the set, apply each of the k hash functions to x * and check whether the corresponding bits are set in the filter. If * any are not set, x is definitely not a member. If all are set, x * may be a member. The probability of error (the false positive rate) * is f = (1 - e^(-kN/M))^k, where N is the number of set members. * * This class takes advantage of the fact that SHA1 digests are good- * quality pseudo-random numbers. The k hash functions are the values * of distinct sets of bits taken from the 20-byte SHA1 hash. The * number of bits in the filter, M, is constrained to be a power of * 2; M == 2^m. The number of bits in each hash function may not * exceed floor(m/k). * * This class is designed to be thread-safe, but this has not been * exhaustively tested. * * @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A> * * BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice * app - http://xlattice.sourceforge.net/ * * minor tweaks by jrandom, exposing unsynchronized access and * allowing larger M and K. changes released into the public domain. * * Note that this is used only by DecayingBloomFilter, which uses only * the unsynchronized locked_foo() methods. * Deprecated for use outside of the router; to be moved to router.jar. * * As of 0.8.11, the locked_foo() methods are thread-safe, in that they work, * but there is a minor risk of false-negatives if two threads are * accessing the same bloom filter integer. */ public class BloomSHA1 { private final int m; private final int k; private int count; private final int[] filter; private final KeySelector ks; // convenience variables private final int filterBits; private final int filterWords; private final BlockingQueue<int[]> buf; /* (24,11) too big - see KeySelector public static void main(String args[]) { BloomSHA1 b = new BloomSHA1(24, 11); for (int i = 0; i < 100; i++) { byte v[] = new byte[32]; v[0] = (byte)i; b.insert(v); } } */ /** * Creates a filter with 2^m bits and k 'hash functions', where * each hash function is portion of the 160-bit SHA1 hash. * @param m determines number of bits in filter * @param k number of hash functionsx * * See KeySelector for important restriction on max m and k */ public BloomSHA1( int m, int k) { // XXX need to devise more reasonable set of checks //if ( m < 2 || m > 20) { // throw new IllegalArgumentException("m out of range"); //} //if ( k < 1 || ( k * m > 160 )) { // throw new IllegalArgumentException( // "too many hash functions for filter size"); //} this.m = m; this.k = k; filterBits = 1 << m; filterWords = (filterBits + 31)/32; // round up filter = new int[filterWords]; ks = new KeySelector(m, k); buf = new LinkedBlockingQueue<int[]>(16); // DEBUG //System.out.println("Bloom constructor: m = " + m + ", k = " + k // + "\n filterBits = " + filterBits // + ", filterWords = " + filterWords); // END } /** * Creates a filter of 2^m bits, with the number of 'hash functions" * k defaulting to 8. * @param m determines size of filter */ public BloomSHA1 (int m) { this(m, 8); } /** * Creates a filter of 2^20 bits with k defaulting to 8. */ public BloomSHA1 () { this (20, 8); } /** Clear the filter, unsynchronized */ private void doClear() { Arrays.fill(filter, 0); count = 0; } /** Synchronized version */ public void clear() { synchronized (this) { doClear(); } } /** * Returns the number of keys which have been inserted. This * class (BloomSHA1) does not guarantee uniqueness in any sense; if the * same key is added N times, the number of set members reported * will increase by N. * * @return number of set members */ public final int size() { synchronized (this) { return count; } } /** * @return number of bits in filter */ public final int capacity () { return filterBits; } /** * Add a key to the set represented by the filter. * * XXX This version does not maintain 4-bit counters, it is not * a counting Bloom filter. * * @param b byte array representing a key (SHA1 digest) */ public void insert (byte[]b) { insert(b, 0, b.length); } public void insert (byte[]b, int offset, int len) { synchronized(this) { locked_insert(b, offset, len); } } public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); } public final void locked_insert(byte[]b, int offset, int len) { int[] bitOffset = acquire(); int[] wordOffset = acquire(); ks.getOffsets(b, offset, len, bitOffset, wordOffset); for (int i = 0; i < k; i++) { filter[wordOffset[i]] |= 1 << bitOffset[i]; } count++; buf.offer(bitOffset); buf.offer(wordOffset); } /** * Is a key in the filter. Sets up the bit and word offset arrays. * * @param b byte array representing a key (SHA1 digest) * @return true if b is in the filter */ private final boolean isMember(byte[] b) { return isMember(b, 0, b.length); } private final boolean isMember(byte[] b, int offset, int len) { int[] bitOffset = acquire(); int[] wordOffset = acquire(); ks.getOffsets(b, offset, len, bitOffset, wordOffset); for (int i = 0; i < k; i++) { if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) { buf.offer(bitOffset); buf.offer(wordOffset); return false; } } buf.offer(bitOffset); buf.offer(wordOffset); return true; } public final boolean locked_member(byte[]b) { return isMember(b); } public final boolean locked_member(byte[]b, int offset, int len) { return isMember(b, offset, len); } /** * Is a key in the filter. External interface, internally synchronized. * * @param b byte array representing a key (SHA1 digest) * @return true if b is in the filter */ public final boolean member(byte[]b) { return member(b, 0, b.length); } public final boolean member(byte[]b, int offset, int len) { synchronized (this) { return isMember(b, offset, len); } } /** * Get the bloom filter offsets for reuse. * Caller should call release(rv) when done with it. * @since 0.8.11 */ public FilterKey getFilterKey(byte[] b, int offset, int len) { int[] bitOffset = acquire(); int[] wordOffset = acquire(); ks.getOffsets(b, offset, len, bitOffset, wordOffset); return new FilterKey(bitOffset, wordOffset); } /** * Add the key to the filter. * @since 0.8.11 */ public void locked_insert(FilterKey fk) { for (int i = 0; i < k; i++) { filter[fk.wordOffset[i]] |= 1 << fk.bitOffset[i]; } count++; } /** * Is the key in the filter. * @since 0.8.11 */ public boolean locked_member(FilterKey fk) { for (int i = 0; i < k; i++) { if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) ) return false; } return true; } /** * @since 0.8.11 */ private int[] acquire() { int[] rv = buf.poll(); if (rv != null) return rv; return new int[k]; } /** * @since 0.8.11 */ public void release(FilterKey fk) { buf.offer(fk.bitOffset); buf.offer(fk.wordOffset); } /** * Store the (opaque) bloom filter offsets for reuse. * @since 0.8.11 */ public static class FilterKey { private final int[] bitOffset; private final int[] wordOffset; private FilterKey(int[] bitOffset, int[] wordOffset) { this.bitOffset = bitOffset; this.wordOffset = wordOffset; } } /** * @param n number of set members * @return approximate false positive rate */ public final double falsePositives(int n) { // (1 - e(-kN/M))^k return java.lang.Math.pow ( (1l - java.lang.Math.exp(0d- ((double)k) * (long)n / filterBits)), k); } public final double falsePositives() { return falsePositives(count); } /***** // DEBUG METHODS public static String keyToString(byte[] key) { StringBuilder sb = new StringBuilder().append(key[0]); for (int i = 1; i < key.length; i++) { sb.append(".").append(Integer.toString(key[i], 16)); } return sb.toString(); } *****/ /** convert 64-bit integer to hex String */ /***** public static String ltoh (long i) { StringBuilder sb = new StringBuilder().append("#") .append(Long.toString(i, 16)); return sb.toString(); } *****/ /** convert 32-bit integer to String */ /***** public static String itoh (int i) { StringBuilder sb = new StringBuilder().append("#") .append(Integer.toString(i, 16)); return sb.toString(); } *****/ /** convert single byte to String */ /***** public static String btoh (byte b) { int i = 0xff & b; return itoh(i); } *****/ }