package be.bagofwords.db.bloomfilter;
import be.bagofwords.application.BowTaskScheduler;
import be.bagofwords.db.DBUtils;
import be.bagofwords.db.DataInterface;
import be.bagofwords.db.LayeredDataInterface;
import be.bagofwords.iterator.CloseableIterator;
import be.bagofwords.ui.UI;
import be.bagofwords.util.KeyValue;
import java.util.Iterator;
import java.util.concurrent.locks.ReentrantLock;
public class BloomFilterDataInterface<T extends Object> extends LayeredDataInterface<T> {
private static final double INITIAL_FPP = 0.001;
private final static double MAX_FPP = INITIAL_FPP * 20;
private final DataInterface<LongBloomFilterWithCheckSum> bloomFilterDataInterface;
private final ReentrantLock modifyBloomFilterLock;
private LongBloomFilterWithCheckSum bloomFilter;
private long currentKeyForNewBloomFilterCreation = Long.MAX_VALUE;
private long actualWriteCount;
private long writeCountOfSavedFilter;
public BloomFilterDataInterface(DataInterface<T> baseInterface, DataInterface<LongBloomFilterWithCheckSum> bloomFilterDataInterface, BowTaskScheduler taskScheduler) {
super(baseInterface);
this.bloomFilterDataInterface = bloomFilterDataInterface;
this.modifyBloomFilterLock = new ReentrantLock();
this.bloomFilter = bloomFilterDataInterface.read(getName());
if (this.bloomFilter != null) {
actualWriteCount = writeCountOfSavedFilter = this.bloomFilter.getDataCheckSum();
} else {
writeCountOfSavedFilter = -Long.MAX_VALUE;
actualWriteCount = writeCountOfSavedFilter + 1;
}
taskScheduler.schedulePeriodicTask(() -> ifNotClosed(this::writeBloomFilterToDiskIfNecessary), 1000);
}
@Override
public void optimizeForReading() {
baseInterface.optimizeForReading();
if (!validBloomFilter(this.bloomFilter)) {
createNewBloomFilter();
}
}
@Override
public T read(long key) {
LongBloomFilterWithCheckSum currentBloomFilter = bloomFilter;
boolean validFilter = validBloomFilter(currentBloomFilter);
if (!validFilter && modifyBloomFilterLock.tryLock()) {
createNewBloomFilter();
currentBloomFilter = bloomFilter;
modifyBloomFilterLock.unlock();
}
if (!validFilter || currentKeyForNewBloomFilterCreation < key) {
//we are still creating the bloom filter
return baseInterface.read(key);
} else {
if (currentBloomFilter.mightContain(key)) {
return baseInterface.read(key);
} else {
return null;
}
}
}
private boolean validBloomFilter(LongBloomFilterWithCheckSum bloomFilter) {
return bloomFilter != null && actualWriteCount == bloomFilter.getDataCheckSum();
}
@Override
public void write(long key, T value) {
tryToUpdateFilter(key);
baseInterface.write(key, value);
}
private void tryToUpdateFilter(long key) {
LongBloomFilterWithCheckSum currFilter = bloomFilter;
if (currFilter != null) {
//try to keep filter up-to-date
currFilter.put(key);
currFilter.increaseDataCheckSum();
}
if (currFilter != null && currFilter.expectedFpp() > MAX_FPP) {
modifyBloomFilterLock.lock();
if (bloomFilter != null && bloomFilter.expectedFpp() > MAX_FPP) {
bloomFilter = null;
}
modifyBloomFilterLock.unlock();
}
actualWriteCount++;
}
@Override
public void write(final Iterator<KeyValue<T>> keyValueIterator) {
baseInterface.write(new Iterator<KeyValue<T>>() {
@Override
public boolean hasNext() {
return keyValueIterator.hasNext();
}
@Override
public KeyValue<T> next() {
KeyValue<T> next = keyValueIterator.next();
tryToUpdateFilter(next.getKey());
return next;
}
@Override
public void remove() {
keyValueIterator.remove();
}
});
}
@Override
public void dropAllData() {
modifyBloomFilterLock.lock();
try {
baseInterface.dropAllData();
actualWriteCount = 0;
createNewBloomFilterNonSynchronized();
writeBloomFilterToDiskIfNecessary();
} finally {
modifyBloomFilterLock.unlock();
}
}
@Override
public boolean mightContain(long key) {
LongBloomFilterWithCheckSum currentBloomFilter = bloomFilter;
boolean validFilter = validBloomFilter(currentBloomFilter);
if (!validFilter && modifyBloomFilterLock.tryLock()) {
createNewBloomFilter();
currentBloomFilter = bloomFilter;
modifyBloomFilterLock.unlock();
}
if (!validFilter || currentKeyForNewBloomFilterCreation < key) {
//we are still creating the bloom filter
return baseInterface.read(key) != null;
} else {
return currentBloomFilter.mightContain(key);
}
}
private void createNewBloomFilterNonSynchronized() {
currentKeyForNewBloomFilterCreation = Long.MIN_VALUE;
long numOfValuesForBloomFilter = baseInterface.apprSize();
bloomFilter = new LongBloomFilterWithCheckSum(numOfValuesForBloomFilter, INITIAL_FPP);
bloomFilter.setDataCheckSum(actualWriteCount);
baseInterface.flush();
long start = System.currentTimeMillis();
int numOfKeys = 0;
CloseableIterator<Long> it = baseInterface.keyIterator();
while (it.hasNext()) {
long key = it.next();
bloomFilter.put(key);
numOfKeys++;
currentKeyForNewBloomFilterCreation = key;
if (numOfKeys > 100 && numOfKeys > numOfValuesForBloomFilter * 10) {
throw new RuntimeException("Received " + numOfKeys + " while we only expected " + numOfValuesForBloomFilter);
}
}
it.close();
currentKeyForNewBloomFilterCreation = Long.MAX_VALUE;
long taken = (System.currentTimeMillis() - start);
UI.write("Created bloomfilter " + getName() + " in " + taken + " ms for " + numOfKeys + " keys, size is " + bloomFilter.getBits().size() / (8 * 1024) + " kbytes.");
}
private void createNewBloomFilter() {
modifyBloomFilterLock.lock();
createNewBloomFilterNonSynchronized();
modifyBloomFilterLock.unlock();
}
@Override
public synchronized void flush() {
baseInterface.flush();
writeBloomFilterToDiskIfNecessary();
}
private void writeBloomFilterToDiskIfNecessary() {
modifyBloomFilterLock.lock();
boolean needsToBeWritten;
if (bloomFilter == null) {
needsToBeWritten = writeCountOfSavedFilter != -Long.MAX_VALUE;
} else {
needsToBeWritten = writeCountOfSavedFilter != bloomFilter.getDataCheckSum() && bloomFilter.getDataCheckSum() == actualWriteCount;
}
if (needsToBeWritten) {
long start = System.currentTimeMillis();
bloomFilterDataInterface.write(getName(), bloomFilter);
bloomFilterDataInterface.flush();
if (bloomFilter == null) {
writeCountOfSavedFilter = -Long.MAX_VALUE;
} else {
writeCountOfSavedFilter = bloomFilter.getDataCheckSum();
}
if (DBUtils.DEBUG) {
UI.write("Written bloom filter to disk, " + actualWriteCount + " " + (bloomFilter != null ? bloomFilter.getDataCheckSum() : -Long.MAX_VALUE) + " " + writeCountOfSavedFilter + " took " + (System.currentTimeMillis() - start));
}
}
modifyBloomFilterLock.unlock();
}
@Override
protected void doCloseImpl() {
writeBloomFilterToDiskIfNecessary();
bloomFilter = null;
}
}