package net.i2p.router.util;
import net.i2p.I2PAppContext;
import net.i2p.util.ConcurrentHashSet;
import net.i2p.util.Log;
/**
* Double buffered hash set.
* Since DecayingBloomFilter was instantiated 4 times for a total memory usage
* of 8MB, it seemed like we could do a lot better, given these usage stats
* on a class L router:
*
* ./router/java/src/net/i2p/router/tunnel/BuildMessageProcessor.java:
* 32 bytes, peak 10 entries in 1m
* (320 peak entries seen on fast router)
*
* ./router/java/src/net/i2p/router/transport/udp/InboundMessageFragments.java:
* 4 bytes, peak 150 entries in 10s
* (1600 peak entries seen on fast router)
*
* ./router/java/src/net/i2p/router/MessageValidator.java:
* 8 bytes, peak 1K entries in 2m
* (36K peak entries seen on fast router)
*
* ./router/java/src/net/i2p/router/tunnel/BloomFilterIVValidator.java:
* 16 bytes, peak 15K entries in 10m
*
* If the ArrayWrapper object in the HashSet is 50 bytes, and BloomSHA1(23, 11) is 1MB,
* then for less than 20K entries this is smaller.
* And this uses space proportional to traffic, so it doesn't penalize small routers
* with a fixed 8MB.
* So let's try it for the first 2 or 3, for now.
*
* Also, DBF is synchronized, and uses SimpleTimer.
* Here we use a read/write lock, with synchronization only
* when switching double buffers, and we use SimpleTimer2.
*
* Yes, we could stare at stats all day, and try to calculate an acceptable
* false-positive rate for each of the above uses, then estimate the DBF size
* required to meet that rate for a given usage. Or even start adjusting the
* Bloom filter m and k values on a per-DBF basis. But it's a whole lot easier
* to implement something with a zero false positive rate, and uses less memory
* for almost all bandwidth classes.
*
* This has a strictly zero false positive rate for <= 8 byte keys.
* For larger keys, it is 1 / (2**64) ~= 5E-20, which is better than
* DBF for any entry count greater than about 14K.
*
* DBF has a zero false negative rate over the period
* 2 * durationMs. And a 100% false negative rate beyond that period.
* This has the same properties.
*
* This performs about twice as fast as DBF in the test below.
*
* @author zzz
*/
public class DecayingHashSet extends DecayingBloomFilter {
private ConcurrentHashSet<ArrayWrapper> _current;
private ConcurrentHashSet<ArrayWrapper> _previous;
/**
* Create a double-buffered hash set that will decay its entries over time.
*
* @param durationMs entries last for at least this long, but no more than twice this long
* @param entryBytes how large are the entries to be added? 1 to 32 bytes
*/
public DecayingHashSet(I2PAppContext context, int durationMs, int entryBytes) {
this(context, durationMs, entryBytes, "DHS");
}
/** @param name just for logging / debugging / stats */
public DecayingHashSet(I2PAppContext context, int durationMs, int entryBytes, String name) {
super(durationMs, entryBytes, name, context);
if (entryBytes <= 0 || entryBytes > 32)
throw new IllegalArgumentException("Bad size");
_current = new ConcurrentHashSet<ArrayWrapper>(128);
_previous = new ConcurrentHashSet<ArrayWrapper>(128);
if (_log.shouldLog(Log.DEBUG))
_log.debug("New DHS " + name + " entryBytes = " + entryBytes +
" cycle (s) = " + (durationMs / 1000));
// try to get a handle on memory usage vs. false positives
context.statManager().createRateStat("router.decayingHashSet." + name + ".size",
"Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
context.statManager().createRateStat("router.decayingHashSet." + name + ".dups",
"1000000 * Duplicates/Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
}
/** unsynchronized but only used for logging elsewhere */
@Override
public int getInsertedCount() {
return _current.size() + _previous.size();
}
/** pointless, only used for logging elsewhere */
@Override
public double getFalsePositiveRate() {
if (_entryBytes <= 8)
return 0d;
return 1d / Math.pow(2d, 64d); // 5.4E-20
}
/**
* @return true if the entry added is a duplicate
*/
@Override
public boolean add(byte entry[], int off, int len) {
if (entry == null)
throw new IllegalArgumentException("Null entry");
if (len != _entryBytes)
throw new IllegalArgumentException("Bad entry [" + len + ", expected "
+ _entryBytes + "]");
ArrayWrapper w = new ArrayWrapper(entry, off, len);
getReadLock();
try {
return locked_add(w, true);
} finally { releaseReadLock(); }
}
/**
* @return true if the entry added is a duplicate. the number of low order
* bits used is determined by the entryBytes parameter used on creation of the
* filter.
*
*/
@Override
public boolean add(long entry) {
return add(entry, true);
}
/**
* @return true if the entry is already known. this does NOT add the
* entry however.
*
*/
@Override
public boolean isKnown(long entry) {
return add(entry, false);
}
private boolean add(long entry, boolean addIfNew) {
ArrayWrapper w = new ArrayWrapper(entry);
getReadLock();
try {
return locked_add(w, addIfNew);
} finally { releaseReadLock(); }
}
/**
* @param addIfNew if true, add the element to current if it is not already there or in previous;
* if false, only check
* @return if the element is in either the current or previous set
*/
private boolean locked_add(ArrayWrapper w, boolean addIfNew) {
boolean seen = _previous.contains(w);
// only access _current once.
if (!seen) {
if (addIfNew)
seen = !_current.add(w);
else
seen = _current.contains(w);
}
if (seen) {
// why increment if addIfNew == false? Only used for stats...
_currentDuplicates++;
}
return seen;
}
@Override
public void clear() {
_current.clear();
_previous.clear();
_currentDuplicates = 0;
}
/** super doesn't call clear, but neither do the users, so it seems like we should here */
@Override
public void stopDecaying() {
_keepDecaying = false;
clear();
}
@Override
protected void decay() {
int currentCount;
long dups;
if (!getWriteLock())
return;
try {
ConcurrentHashSet<ArrayWrapper> tmp = _previous;
currentCount = _current.size();
_previous = _current;
_current = tmp;
_current.clear();
dups = _currentDuplicates;
_currentDuplicates = 0;
} finally { releaseWriteLock(); }
if (_log.shouldLog(Log.DEBUG))
_log.debug("Decaying the filter " + _name + " after inserting " + currentCount
+ " elements and " + dups + " false positives");
_context.statManager().addRateData("router.decayingHashSet." + _name + ".size",
currentCount);
if (currentCount > 0)
_context.statManager().addRateData("router.decayingHashSet." + _name + ".dups",
1000l*1000*dups/currentCount);
}
/**
* This saves the data as-is if the length is <= 8 bytes,
* otherwise it stores an 8-byte hash.
* Hash function is from DataHelper, modded to get
* the maximum entropy given the length of the data.
*/
private static class ArrayWrapper {
private final long _longhashcode;
public ArrayWrapper(byte[] b, int offset, int len) {
int idx = offset;
int shift = Math.min(8, 64 / len);
long lhc = 0;
for (int i = 0; i < len; i++) {
// xor better than + in tests
lhc ^= (((long) b[idx++]) << (i * shift));
}
_longhashcode = lhc;
}
/** faster version for when storing <= 8 bytes */
public ArrayWrapper(long b) {
_longhashcode = b;
}
public int hashCode() {
return (int) _longhashcode;
}
public long longHashCode() {
return _longhashcode;
}
public boolean equals(Object o) {
if (o == null || !(o instanceof ArrayWrapper))
return false;
return ((ArrayWrapper) o).longHashCode() == _longhashcode;
}
}
/**
* vs. DBF, this measures 1.93x faster for testByLong and 2.46x faster for testByBytes.
*/
/*****
public static void main(String args[]) {
// KBytes per sec, 1 message per KByte
int kbps = 256;
int iterations = 10;
//testSize();
testByLong(kbps, iterations);
testByBytes(kbps, iterations);
}
*****/
/** and the answer is: 49.9 bytes. The ArrayWrapper alone measured 16, so that's 34 for the HashSet entry. */
/*****
private static void testSize() {
int qty = 256*1024;
byte b[] = new byte[8];
Random r = new Random();
long old = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
ConcurrentHashSet foo = new ConcurrentHashSet(qty);
for (int i = 0; i < qty; i++) {
r.nextBytes(b);
foo.add(new ArrayWrapper(b, 0, 8));
}
long used = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
System.out.println("Memory per ArrayWrapper: " + (((double) (used - old)) / qty));
}
*****/
/** 8 bytes, simulate the router message validator */
/*****
private static void testByLong(int kbps, int numRuns) {
int messages = 60 * 10 * kbps;
Random r = new Random();
DecayingBloomFilter filter = new DecayingHashSet(I2PAppContext.getGlobalContext(), 600*1000, 8);
int falsePositives = 0;
long totalTime = 0;
for (int j = 0; j < numRuns; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < messages; i++) {
if (filter.add(r.nextLong())) {
falsePositives++;
System.out.println("False positive " + falsePositives + " (testByLong j=" + j + " i=" + i + ")");
}
}
totalTime += System.currentTimeMillis() - start;
filter.clear();
}
System.out.println("False postive rate should be " + filter.getFalsePositiveRate());
filter.stopDecaying();
System.out.println("After " + numRuns + " runs pushing " + messages + " entries in "
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
+ falsePositives + " false positives");
}
*****/
/** 16 bytes, simulate the tunnel IV validator */
/*****
private static void testByBytes(int kbps, int numRuns) {
byte iv[][] = new byte[60*10*kbps][16];
Random r = new Random();
for (int i = 0; i < iv.length; i++)
r.nextBytes(iv[i]);
DecayingBloomFilter filter = new DecayingHashSet(I2PAppContext.getGlobalContext(), 600*1000, 16);
int falsePositives = 0;
long totalTime = 0;
for (int j = 0; j < numRuns; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < iv.length; i++) {
if (filter.add(iv[i])) {
falsePositives++;
System.out.println("False positive " + falsePositives + " (testByBytes j=" + j + " i=" + i + ")");
}
}
totalTime += System.currentTimeMillis() - start;
filter.clear();
}
System.out.println("False postive rate should be " + filter.getFalsePositiveRate());
filter.stopDecaying();
System.out.println("After " + numRuns + " runs pushing " + iv.length + " entries in "
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
+ falsePositives + " false positives");
}
*****/
}