package net.objectlab.kit.util;
import java.math.BigDecimal;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* Very basic distribution count per bucket. It is not meant to handle extreme large dataset and the calculation
* is quite basic and inefficient at the moment.
*
* Given a list of bucket upper limits, the distribution will count the number of occurrences.
* If a value is <= bucket (ordered) then the count is incremented.
* If the value is > than the upper limit then the count is incremented for an upper limit bucket.
*/
public class FrequencyBucketDistribution {
private final List<Bucket> orderedBuckets;
public static class Bucket {
private final BigDecimal bucket;
private long count;
public Bucket(final BigDecimal bucket) {
this.bucket = bucket;
}
public void incrementCount() {
count++;
}
public BigDecimal getBucket() {
return bucket;
}
public long getCount() {
return count;
}
public boolean isUpperLimit() {
return bucket == null;
}
}
/**
* Anything <= smallest bucket goes into the bucket. If the value to classify is > last bucket, then it is put in the last bucket.
* @param buckets
*/
public FrequencyBucketDistribution(final List<BigDecimal> buckets) {
if (buckets != null) {
orderedBuckets = buckets.stream().map(t -> new Bucket(t)).sorted((o1, o2) -> o1.bucket.compareTo(o2.bucket)).collect(Collectors.toList());
orderedBuckets.add(new Bucket(null)); // upper limit
} else {
orderedBuckets = Collections.emptyList();
}
}
public void addPoint(final BigDecimal point) {
for (final Bucket b : orderedBuckets) {
if (!b.isUpperLimit() && BigDecimalUtil.compareTo(point, b.getBucket()) <= 0) {
b.incrementCount();
return;
}
}
orderedBuckets.get(orderedBuckets.size() - 1).incrementCount();
}
public List<Bucket> getDistribution() {
return orderedBuckets;
}
}