package org.archive.wayback.resourceindex.filters;
import java.util.LinkedHashMap;
import java.util.Map;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.util.ObjectFilter;
public class DuplicateHashFilter implements ObjectFilter<CaptureSearchResult> {
private int maxDupeHashes = 10;
protected int maxTrackedHashes = 3;
private int numCaptures = 0;
private int minThreshold = 100;
public class LRUHashCache extends LinkedHashMap<String, Integer>
{
private static final long serialVersionUID = 1L;
public boolean removeEldestEntry(Map.Entry<String, Integer> eldest) {
return (size() > maxTrackedHashes);
}
}
LRUHashCache cache = new LRUHashCache();
@Override
public int filterObject(CaptureSearchResult o) {
String thisHash = o.getDigest();
int result = FILTER_INCLUDE;
// Only start filtering after minThreshold captures
if (++numCaptures <= minThreshold) {
return result;
}
Integer count = cache.remove(thisHash);
if (count == null) {
cache.put(thisHash, 1);
} else {
if (count >= maxDupeHashes) {
result = FILTER_EXCLUDE;
cache.put(thisHash, count);
} else {
cache.put(thisHash, count + 1);
}
}
return result;
}
}