package net.objectlab.kit.util; import java.math.BigDecimal; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; /** * Very basic distribution count per bucket. It is not meant to handle extreme large dataset and the calculation * is quite basic and inefficient at the moment. * * Given a list of bucket upper limits, the distribution will count the number of occurrences. * If a value is <= bucket (ordered) then the count is incremented. * If the value is > than the upper limit then the count is incremented for an upper limit bucket. */ public class FrequencyBucketDistribution { private final List<Bucket> orderedBuckets; public static class Bucket { private final BigDecimal bucket; private long count; public Bucket(final BigDecimal bucket) { this.bucket = bucket; } public void incrementCount() { count++; } public BigDecimal getBucket() { return bucket; } public long getCount() { return count; } public boolean isUpperLimit() { return bucket == null; } } /** * Anything <= smallest bucket goes into the bucket. If the value to classify is > last bucket, then it is put in the last bucket. * @param buckets */ public FrequencyBucketDistribution(final List<BigDecimal> buckets) { if (buckets != null) { orderedBuckets = buckets.stream().map(t -> new Bucket(t)).sorted((o1, o2) -> o1.bucket.compareTo(o2.bucket)).collect(Collectors.toList()); orderedBuckets.add(new Bucket(null)); // upper limit } else { orderedBuckets = Collections.emptyList(); } } public void addPoint(final BigDecimal point) { for (final Bucket b : orderedBuckets) { if (!b.isUpperLimit() && BigDecimalUtil.compareTo(point, b.getBucket()) <= 0) { b.incrementCount(); return; } } orderedBuckets.get(orderedBuckets.size() - 1).incrementCount(); } public List<Bucket> getDistribution() { return orderedBuckets; } }