package be.bagofwords.db.data;
import be.bagofwords.util.Pair;
import com.fasterxml.jackson.annotation.JsonIgnore;
public class CompactCountsIndex {
public static final double FPP = 0.01;
private int numberOfCounts;
private long maxCounts;
private CountsList cachedKeys;
private LongCountsBloomFilter filterCounts;
public CompactCountsIndex(long maxCounts, int numberOfCounts) {
this.maxCounts = maxCounts;
this.numberOfCounts = numberOfCounts;
this.cachedKeys = new CountsList();
}
public CompactCountsIndex(long maxCounts, LongCountsBloomFilter filterCounts, int numberOfCounts) {
this.maxCounts = maxCounts;
this.numberOfCounts = numberOfCounts;
this.filterCounts = filterCounts;
}
public CompactCountsIndex() {
}
@JsonIgnore
public boolean isSparse() {
return filterCounts == null;
}
public void addCount(long key) {
addCount(key, 1);
}
//Serialization
public void addCount(long key, int count) {
if (isSparse()) {
cachedKeys.addCount(key, count);
} else {
filterCounts.addCount(key, count);
}
numberOfCounts += count;
}
public long getMaxCounts() {
return maxCounts;
}
public void setMaxCounts(long maxCounts) {
this.maxCounts = maxCounts;
}
public CountsList getCachedKeys() {
return cachedKeys;
}
public void setCachedKeys(CountsList cachedKeys) {
this.cachedKeys = cachedKeys;
}
public LongCountsBloomFilter getFilterCounts() {
return filterCounts;
}
public void setFilterCounts(LongCountsBloomFilter filterCounts) {
this.filterCounts = filterCounts;
}
public int getNumberOfCounts() {
return numberOfCounts;
}
public void setNumberOfCounts(int numberOfCounts) {
this.numberOfCounts = numberOfCounts;
}
public int getCount(long key) {
if (isSparse()) {
return (int) cachedKeys.getCount(key);
} else {
return filterCounts.getMaxCount(key);
}
}
public void incrementNumberOfCounts() {
numberOfCounts++;
}
public CompactCountsIndex mergeWith(CompactCountsIndex second) {
CompactCountsIndex result;
long maxSizeForSparse = getNumberOfValuesForSparse();
boolean makeSparse = this.isSparse() && second.isSparse() && this.getCachedKeys().size() + second.getCachedKeys().size() < maxSizeForSparse;
if (makeSparse) {
result = new CompactCountsIndex(this.getMaxCounts(), this.getNumberOfCounts() + second.getNumberOfCounts());
result.setCachedKeys(new CountsList(this.getCachedKeys()));
for (Pair<Long, Long> value : second.getCachedKeys()) {
result.addCount(value.getFirst(), value.getSecond().intValue());
}
} else {
LongCountsBloomFilter mergedCounts;
if (!this.isSparse() && !second.isSparse()) {
mergedCounts = ApproximateCountsUtils.mergeBloomCountFilters(this.getFilterCounts(), second.getFilterCounts());
} else {
if (!this.isSparse()) {
mergedCounts = this.getFilterCounts().clone();
} else if (!second.isSparse()) {
mergedCounts = second.getFilterCounts().clone();
} else {
//Both sparse
mergedCounts = new LongCountsBloomFilter(this.getMaxCounts(), CompactCountsIndex.FPP);
}
}
if (this.isSparse()) {
for (Pair<Long, Long> value : this.getCachedKeys()) {
mergedCounts.addCount(value.getFirst(), value.getSecond().intValue());
}
}
if (second.isSparse()) {
for (Pair<Long, Long> value : second.getCachedKeys()) {
mergedCounts.addCount(value.getFirst(), value.getSecond().intValue());
}
}
result = new CompactCountsIndex(this.getMaxCounts(), mergedCounts, this.getNumberOfCounts() + second.getNumberOfCounts());
}
result.compact();
return result;
}
public void compact() {
if (isSparse()) {
getCachedKeys().compact();
}
}
@JsonIgnore
public int getNumberOfValuesForSparse() {
return (int) this.getMaxCounts() / 10;
}
@JsonIgnore
public int getTotal() {
return getNumberOfCounts();
}
public CompactCountsIndex clone() {
CompactCountsIndex clone = new CompactCountsIndex(getMaxCounts(), getNumberOfCounts());
if (isSparse()) {
clone.setCachedKeys(getCachedKeys().clone());
} else {
clone.setFilterCounts(getFilterCounts().clone());
}
return clone;
}
}