package net.i2p.router.util; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; import net.i2p.I2PAppContext; import net.i2p.data.DataHelper; import net.i2p.util.Log; import net.i2p.util.SimpleTimer2; import org.xlattice.crypto.filters.BloomSHA1; /** * Series of bloom filters which decay over time, allowing their continual use * for time sensitive data. This has a fixed size (per * period, using two periods overall), allowing this to pump through hundreds of * entries per second with virtually no false positive rate. Down the line, * this may be refactored to allow tighter control of the size necessary for the * contained bloom filters. * * See main() for an analysis of false positive rate. * See BloomFilterIVValidator for instantiation parameters. * See DecayingHashSet for a smaller and simpler version. * See net.i2p.router.tunnel.BloomFilterIVValidator * @see net.i2p.router.util.DecayingHashSet */ public class DecayingBloomFilter { protected final I2PAppContext _context; protected final Log _log; private BloomSHA1 _current; private BloomSHA1 _previous; protected final int _durationMs; protected final int _entryBytes; private final byte _extenders[][]; private final byte _extended[]; private final byte _longToEntry[]; private final long _longToEntryMask; protected long _currentDuplicates; protected volatile boolean _keepDecaying; protected final SimpleTimer2.TimedEvent _decayEvent; /** just for logging */ protected final String _name; /** synchronize against this lock when switching double buffers */ protected final ReentrantReadWriteLock _reorganizeLock = new ReentrantReadWriteLock(); private static final int DEFAULT_M = 23; private static final int DEFAULT_K = 11; /** true for debugging */ private static final boolean ALWAYS_MISS = false; /** only for extension by DHS */ protected DecayingBloomFilter(int durationMs, int entryBytes, String name, I2PAppContext context) { _context = context; _log = context.logManager().getLog(getClass()); _entryBytes = entryBytes; _name = name; _durationMs = durationMs; // all final _extenders = null; _extended = null; _longToEntry = null; _longToEntryMask = 0; context.addShutdownTask(new Shutdown()); _keepDecaying = true; if (_durationMs == 60*60*1000) { // special mode for BuildMessageProcessor _decayEvent = new DecayHourlyEvent(); } else { _decayEvent = new DecayEvent(); _decayEvent.schedule(_durationMs); } } /** * Create a bloom filter that will decay its entries over time. * Uses default m of 23, memory usage is 2 MB. * * @param durationMs entries last for at least this long, but no more than twice this long * @param entryBytes how large are the entries to be added? if this is less than 32 bytes, * the entries added will be expanded by concatenating their XORing * against with sufficient random values. */ public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes) { this(context, durationMs, entryBytes, "DBF"); } /** * Uses default m of 23, memory usage is 2 MB. * @param name just for logging / debugging / stats */ public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes, String name) { // this is instantiated in four different places, they may have different // requirements, but for now use this as a gross method of memory reduction. // m == 23 => 1MB each BloomSHA1 (4 pairs = 8MB total) this(context, durationMs, entryBytes, name, context.getProperty("router.decayingBloomFilterM", DEFAULT_M)); } /** * Memory usage is 2 * (2**m) bits or 2**(m-2) bytes. * * @param m filter size exponent, max is 29 */ public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes, String name, int m) { _context = context; _log = context.logManager().getLog(DecayingBloomFilter.class); _entryBytes = entryBytes; _name = name; int k = DEFAULT_K; // max is (23,11) or (26,10) or (29,9); see KeySelector for details if (m > DEFAULT_M) { k--; if (m > 26) { k--; if (m > 29) throw new IllegalArgumentException("Max m is 29"); } } _current = new BloomSHA1(m, k); _previous = new BloomSHA1(m, k); _durationMs = durationMs; int numExtenders = (32+ (entryBytes-1))/entryBytes - 1; if (numExtenders < 0) numExtenders = 0; _extenders = new byte[numExtenders][entryBytes]; for (int i = 0; i < numExtenders; i++) _context.random().nextBytes(_extenders[i]); if (numExtenders > 0) { _extended = new byte[32]; _longToEntry = new byte[_entryBytes]; _longToEntryMask = (1l << (_entryBytes * 8l)) -1; } else { // final _extended = null; _longToEntry = null; _longToEntryMask = 0; } _keepDecaying = true; if (_durationMs == 60*60*1000) { // special mode for BuildMessageProcessor _decayEvent = new DecayHourlyEvent(); } else { _decayEvent = new DecayEvent(); _decayEvent.schedule(_durationMs); } if (_log.shouldLog(Log.WARN)) _log.warn("New DBF " + name + " m = " + m + " k = " + k + " entryBytes = " + entryBytes + " numExtenders = " + numExtenders + " cycle (s) = " + (durationMs / 1000)); // try to get a handle on memory usage vs. false positives context.statManager().createRateStat("router.decayingBloomFilter." + name + ".size", "Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) }); context.statManager().createRateStat("router.decayingBloomFilter." + name + ".dups", "1000000 * Duplicates/Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) }); context.statManager().createRateStat("router.decayingBloomFilter." + name + ".log10(falsePos)", "log10 of the false positive rate (must have net.i2p.util.DecayingBloomFilter=DEBUG)", "Router", new long[] { 10 * Math.max(60*1000, durationMs) }); context.addShutdownTask(new Shutdown()); } /** * @since 0.8.8 */ private class Shutdown implements Runnable { public void run() { clear(); } } public long getCurrentDuplicateCount() { return _currentDuplicates; } /** unsynchronized but only used for logging elsewhere */ public int getInsertedCount() { return _current.size() + _previous.size(); } /** unsynchronized, only used for logging elsewhere */ public double getFalsePositiveRate() { return _current.falsePositives(); } /** * @return true if the entry added is a duplicate */ public boolean add(byte entry[]) { return add(entry, 0, entry.length); } /** * @return true if the entry added is a duplicate */ public boolean add(byte entry[], int off, int len) { if (ALWAYS_MISS) return false; if (entry == null) throw new IllegalArgumentException("Null entry"); if (len != _entryBytes) throw new IllegalArgumentException("Bad entry [" + len + ", expected " + _entryBytes + "]"); getReadLock(); try { return locked_add(entry, off, len, true); } finally { releaseReadLock(); } } /** * @return true if the entry added is a duplicate. the number of low order * bits used is determined by the entryBytes parameter used on creation of the * filter. * */ public boolean add(long entry) { if (ALWAYS_MISS) return false; if (_entryBytes <= 7) entry = ((entry ^ _longToEntryMask) & ((1 << 31)-1)) | (entry ^ _longToEntryMask); //entry &= _longToEntryMask; if (entry < 0) { DataHelper.toLong(_longToEntry, 0, _entryBytes, 0-entry); _longToEntry[0] |= (1 << 7); } else { DataHelper.toLong(_longToEntry, 0, _entryBytes, entry); } getReadLock(); try { return locked_add(_longToEntry, 0, _longToEntry.length, true); } finally { releaseReadLock(); } } /** * @return true if the entry is already known. this does NOT add the * entry however. * */ public boolean isKnown(long entry) { if (ALWAYS_MISS) return false; if (_entryBytes <= 7) entry = ((entry ^ _longToEntryMask) & ((1 << 31)-1)) | (entry ^ _longToEntryMask); if (entry < 0) { DataHelper.toLong(_longToEntry, 0, _entryBytes, 0-entry); _longToEntry[0] |= (1 << 7); } else { DataHelper.toLong(_longToEntry, 0, _entryBytes, entry); } getReadLock(); try { return locked_add(_longToEntry, 0, _longToEntry.length, false); } finally { releaseReadLock(); } } private boolean locked_add(byte entry[], int offset, int len, boolean addIfNew) { if (_extended != null) { // extend the entry to 32 bytes System.arraycopy(entry, offset, _extended, 0, len); for (int i = 0; i < _extenders.length; i++) DataHelper.xor(entry, offset, _extenders[i], 0, _extended, _entryBytes * (i+1), _entryBytes); BloomSHA1.FilterKey key = _current.getFilterKey(_extended, 0, 32); boolean seen = _current.locked_member(key); if (!seen) seen = _previous.locked_member(key); if (seen) { _currentDuplicates++; _current.release(key); return true; } else { if (addIfNew) { _current.locked_insert(key); } _current.release(key); return false; } } else { BloomSHA1.FilterKey key = _current.getFilterKey(entry, offset, len); boolean seen = _current.locked_member(key); if (!seen) seen = _previous.locked_member(key); if (seen) { _currentDuplicates++; _current.release(key); return true; } else { if (addIfNew) { _current.locked_insert(key); } _current.release(key); return false; } } } public void clear() { if (!getWriteLock()) return; try { _current.clear(); _previous.clear(); _currentDuplicates = 0; } finally { releaseWriteLock(); } } public void stopDecaying() { _keepDecaying = false; _decayEvent.cancel(); } protected void decay() { int currentCount = 0; long dups = 0; double fpr = 0d; if (!getWriteLock()) return; try { BloomSHA1 tmp = _previous; currentCount = _current.size(); if (_log.shouldLog(Log.DEBUG) && currentCount > 0) fpr = _current.falsePositives(); _previous = _current; _current = tmp; _current.clear(); dups = _currentDuplicates; _currentDuplicates = 0; } finally { releaseWriteLock(); } if (_log.shouldLog(Log.DEBUG)) _log.debug("Decaying the filter " + _name + " after inserting " + currentCount + " elements and " + dups + " false positives with FPR = " + fpr); _context.statManager().addRateData("router.decayingBloomFilter." + _name + ".size", currentCount); if (currentCount > 0) _context.statManager().addRateData("router.decayingBloomFilter." + _name + ".dups", 1000l*1000*dups/currentCount); if (fpr > 0d) { // only if log.shouldLog(Log.DEBUG) ... long exponent = (long) Math.log10(fpr); _context.statManager().addRateData("router.decayingBloomFilter." + _name + ".log10(falsePos)", exponent); } } private class DecayEvent extends SimpleTimer2.TimedEvent { /** * Caller MUST schedule. */ DecayEvent() { super(_context.simpleTimer2()); } public void timeReached() { if (_keepDecaying) { decay(); schedule(_durationMs); } } } /** * Decays at 5 minutes after the top of the hour. * This ignores leap seconds. * @since 0.9.24 */ private class DecayHourlyEvent extends SimpleTimer2.TimedEvent { private static final long HOUR = 60 * 60 * 1000L; private static final long LAG = 5 * 60 * 1000L; private volatile long _currentHour; /** * Schedules itself. Caller MUST NOT schedule. */ DecayHourlyEvent() { super(_context.simpleTimer2()); schedule(getTimeTillNextHour()); } public void timeReached() { if (_keepDecaying) { long now = _context.clock().now(); long currentHour = now / HOUR; // handle possible clock adjustments if (_currentHour != currentHour) { decay(); _currentHour = currentHour; } long next = ((1 + currentHour) * HOUR) + LAG; schedule(Math.max(5000, next - now)); } } /** side effect: sets _currentHour */ private long getTimeTillNextHour() { long now = _context.clock().now(); long currentHour = now / HOUR; _currentHour = currentHour; long next = ((1 + currentHour) * HOUR) + LAG; return Math.max(5000, next - now); } } /** @since 0.8.11 moved from DecayingHashSet */ protected void getReadLock() { _reorganizeLock.readLock().lock(); } /** @since 0.8.11 moved from DecayingHashSet */ protected void releaseReadLock() { _reorganizeLock.readLock().unlock(); } /** * @return true if the lock was acquired * @since 0.8.11 moved from DecayingHashSet */ protected boolean getWriteLock() { try { boolean rv = _reorganizeLock.writeLock().tryLock(5000, TimeUnit.MILLISECONDS); if (!rv) _log.error("no lock, size is: " + _reorganizeLock.getQueueLength(), new Exception("rats")); return rv; } catch (InterruptedException ie) {} return false; } /** @since 0.8.11 moved from DecayingHashSet */ protected void releaseWriteLock() { _reorganizeLock.writeLock().unlock(); } /** * This filter is used only for participants and OBEPs, not * IBGWs, so depending on your assumptions of avg. tunnel length, * the performance is somewhat better than the gross share BW * would indicate. * *<pre> * Following stats for m=23, k=11: * Theoretical false positive rate for 16 KBps: 1.17E-21 * Theoretical false positive rate for 24 KBps: 9.81E-20 * Theoretical false positive rate for 32 KBps: 2.24E-18 * Theoretical false positive rate for 256 KBps: 7.45E-9 * Theoretical false positive rate for 512 KBps: 5.32E-6 * Theoretical false positive rate for 1024 KBps: 1.48E-3 * Then it gets bad: 1280 .67%; 1536 2.0%; 1792 4.4%; 2048 8.2%. * * Following stats for m=24, k=10: * 1280 4.5E-5; 1792 5.6E-4; 2048 0.14% * * Following stats for m=25, k=10: * 1792 2.4E-6; 4096 0.14%; 5120 0.6%; 6144 1.7%; 8192 6.8%; 10240 15% * * Following stats for m=26, k=10: * 4096 7.3E-6; 5120 4.5E-5; 6144 1.8E-4; 8192 0.14%; 10240 0.6%, 12288 1.7% * * Following stats for m=27, k=9: * 8192 1.1E-5; 10240 5.6E-5; 12288 2.0E-4; 14336 5.8E-4; 16384 0.14% *</pre> */ /***** public static void main(String args[]) { System.out.println("Usage: DecayingBloomFilter [kbps [m [iterations]]] (default 256 23 10)"); int kbps = 256; if (args.length >= 1) { try { kbps = Integer.parseInt(args[0]); } catch (NumberFormatException nfe) {} } int m = DEFAULT_M; if (args.length >= 2) { try { m = Integer.parseInt(args[1]); } catch (NumberFormatException nfe) {} } int iterations = 10; if (args.length >= 3) { try { iterations = Integer.parseInt(args[2]); } catch (NumberFormatException nfe) {} } testByLong(kbps, m, iterations); testByBytes(kbps, m, iterations); } private static void testByLong(int kbps, int m, int numRuns) { System.out.println("Starting 8 byte test"); int messages = 60 * 10 * kbps; java.util.Random r = new java.util.Random(); DecayingBloomFilter filter = new DecayingBloomFilter(I2PAppContext.getGlobalContext(), 600*1000, 8, "test", m); int falsePositives = 0; long totalTime = 0; double fpr = 0d; for (int j = 0; j < numRuns; j++) { // screen out birthday paradoxes (waste of time and space?) java.util.Set<Long> longs = new java.util.HashSet<Long>(messages); long start = System.currentTimeMillis(); for (int i = 0; i < messages; i++) { long rand; do { rand = r.nextLong(); } while (!longs.add(Long.valueOf(rand))); if (filter.add(rand)) { falsePositives++; //System.out.println("False positive " + falsePositives + " (testByLong j=" + j + " i=" + i + ")"); } } totalTime += System.currentTimeMillis() - start; fpr = filter.getFalsePositiveRate(); filter.clear(); } filter.stopDecaying(); System.out.println("False postive rate should be " + fpr); System.out.println("After " + numRuns + " runs pushing " + messages + " entries in " + DataHelper.formatDuration(totalTime/numRuns) + " per run, there were " + falsePositives + " false positives (" + (((double) falsePositives) / messages) + ')'); } private static void testByBytes(int kbps, int m, int numRuns) { System.out.println("Starting 16 byte test"); byte iv[][] = new byte[60*10*kbps][16]; java.util.Random r = new java.util.Random(); for (int i = 0; i < iv.length; i++) r.nextBytes(iv[i]); DecayingBloomFilter filter = new DecayingBloomFilter(I2PAppContext.getGlobalContext(), 600*1000, 16, "test", m); int falsePositives = 0; long totalTime = 0; double fpr = 0d; for (int j = 0; j < numRuns; j++) { long start = System.currentTimeMillis(); for (int i = 0; i < iv.length; i++) { if (filter.add(iv[i])) { falsePositives++; //System.out.println("False positive " + falsePositives + " (testByBytes j=" + j + " i=" + i + ")"); } } totalTime += System.currentTimeMillis() - start; fpr = filter.getFalsePositiveRate(); filter.clear(); } filter.stopDecaying(); System.out.println("False postive rate should be " + fpr); System.out.println("After " + numRuns + " runs pushing " + iv.length + " entries in " + DataHelper.formatDuration(totalTime/numRuns) + " per run, there were " + falsePositives + " false positives (" + (((double) falsePositives) / iv.length) + ')'); //System.out.println("inserted: " + bloom.size() + " with " + bloom.capacity() // + " (" + bloom.falsePositives()*100.0d + "% false positive)"); } *****/ }