package be.bagofwords.db.data;
import be.bagofwords.db.bloomfilter.LongBloomFilter;
import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class CompactIndex {
public static final double FPP = 0.01;
private int numberOfCounts;
private long numberOfFeatures;
private List<Long> cachedKeys;
private LongBloomFilter filterCounts;
private boolean wasCompacted = false;
public CompactIndex(long numberOfFeatures, int numberOfCounts) {
this.cachedKeys = new ArrayList<>();
this.numberOfFeatures = numberOfFeatures;
this.numberOfCounts = numberOfCounts;
}
public CompactIndex(long numberOfFeatures, LongBloomFilter filterCounts, int numberOfCounts) {
this.numberOfFeatures = numberOfFeatures;
this.filterCounts = filterCounts;
this.numberOfCounts = numberOfCounts;
}
public CompactIndex() {
}
@JsonIgnore
public boolean isSparse() {
return filterCounts == null;
}
public void addKey(long key) {
numberOfCounts++;
if (isSparse()) {
cachedKeys.add(key);
wasCompacted = false;
} else {
filterCounts.put(key);
}
}
public long getNumberOfFeatures() {
return numberOfFeatures;
}
public void setNumberOfFeatures(long numberOfFeatures) {
this.numberOfFeatures = numberOfFeatures;
}
public List<Long> getCachedKeys() {
return cachedKeys;
}
public void setCachedKeys(List<Long> cachedKeys) {
this.cachedKeys = cachedKeys;
}
public LongBloomFilter getFilterCounts() {
return filterCounts;
}
public void setFilterCounts(LongBloomFilter filterCounts) {
this.filterCounts = filterCounts;
}
public int getNumberOfCounts() {
return numberOfCounts;
}
public void setNumberOfCounts(int numberOfCounts) {
this.numberOfCounts = numberOfCounts;
}
public boolean mightContain(long key) {
if (isSparse()) {
return cachedKeys.contains(key);
} else {
return filterCounts.mightContain(key);
}
}
public void makeNonSparse() {
this.filterCounts = new LongBloomFilter(numberOfFeatures, FPP);
for (Long key : cachedKeys) {
filterCounts.put(key);
}
this.cachedKeys = null;
}
public CompactIndex mergeWith(CompactIndex second) {
CompactIndex result;
long maxSizeForSparse = this.getNumberOfFeatures() / 10;
boolean makeSparse = this.isSparse() && second.isSparse() && this.getCachedKeys().size() + second.getCachedKeys().size() < maxSizeForSparse;
if (makeSparse) {
result = new CompactIndex(this.getNumberOfFeatures(), this.getNumberOfCounts() + second.getNumberOfCounts());
result.setCachedKeys(new ArrayList<>(this.getCachedKeys()));
for (Long key : second.getCachedKeys()) {
result.addKey(key);
}
} else {
LongBloomFilter mergedCounts;
if (!this.isSparse() && !second.isSparse()) {
mergedCounts = ApproximateCountsUtils.mergeBloomFilters(this.getFilterCounts(), second.getFilterCounts());
} else {
if (!this.isSparse()) {
mergedCounts = this.getFilterCounts().clone();
} else if (!second.isSparse()) {
mergedCounts = second.getFilterCounts().clone();
} else {
//Both sparse
mergedCounts = new LongBloomFilter(this.getNumberOfFeatures(), CompactIndex.FPP);
}
}
if (this.isSparse()) {
for (Long key : this.getCachedKeys()) {
mergedCounts.put(key);
}
}
if (second.isSparse()) {
for (Long key : second.getCachedKeys()) {
mergedCounts.put(key);
}
}
result = new CompactIndex(this.getNumberOfFeatures(), mergedCounts, this.getNumberOfCounts() + second.getNumberOfCounts());
}
result.compact();
return result;
}
public synchronized void compact() {
if (!wasCompacted) {
if (isSparse()) {
Collections.sort(cachedKeys);
List<Long> newCachedKeys = new ArrayList<>();
long prev = Long.MAX_VALUE;
for (int i = 0; i < cachedKeys.size(); i++) {
long curr = cachedKeys.get(i);
if (curr != prev) {
newCachedKeys.add(curr);
}
prev = curr;
}
cachedKeys = newCachedKeys;
}
wasCompacted = true;
}
}
}