/** * Copyright (C) 2009-2013 FoundationDB, LLC * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.foundationdb.server.store.statistics.histograms; import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; import java.util.ArrayList; import java.util.List; final class BucketSampler<T> { public boolean add(Bucket<T> bucket) { long bucketEqualsCount = bucket.getEqualsCount(); long bucketsRepresented = (bucketEqualsCount + bucket.getLessThanCount()); inputsCount += bucketsRepresented; // Form a bucket if: // 1) we've crossed a median point, // 2) we're at the end, or // 3) we're at the beginning (see bug 1052606) boolean insertIntoResults = false; if (buckets.isEmpty()) { // Bucket with min value of index bucket.markMinKeyBucket(); insertIntoResults = true; // We want to keep the min-value bucket no matter what. If we were going to keep it anyway, due to // crossing a median point, then our median markers are OK as is. Otherwise, they need to be recomputed. if (inputsCount >= nextMedianPoint) { while (inputsCount >= nextMedianPoint) { nextMedianPoint += medianPointDistance; } } else { computeMedianPointBoundaries(maxSize - 1); } } else if (inputsCount == estimatedInputs) { // end insertIntoResults = true; } else { // Did we cross a median point? while (inputsCount >= nextMedianPoint) { insertIntoResults = true; nextMedianPoint += medianPointDistance; } } if (insertIntoResults) { appendToResults(bucket); } else { runningLessThans += bucketsRepresented; runningLessThanDistincts += bucket.getLessThanDistinctsCount() + 1; } // stats if (stdDev != null) stdDev.increment(bucketEqualsCount); ++bucketsSeen; equalsSeen += bucketEqualsCount; return insertIntoResults; } public void appendToResults(Bucket<T> bucket) { bucket.addLessThanDistincts(runningLessThanDistincts); bucket.addLessThans(runningLessThans); buckets.add(bucket); runningLessThanDistincts = 0; runningLessThans = 0; } List<Bucket<T>> buckets() { return buckets; } public double getEqualsStdDev() { if (stdDev == null) throw new IllegalStateException("standard deviation not computed"); return stdDev.getResult(); } public double getEqualsMean() { return ((double)equalsSeen) / ((double)bucketsSeen); } BucketSampler(int bucketCount, long estimatedInputs) { this(bucketCount, estimatedInputs, true); } BucketSampler(int maxSize, long estimatedInputs, boolean calculateStandardDeviation) { if (maxSize < 1) throw new IllegalArgumentException("max must be at least 1"); if (estimatedInputs < 0) throw new IllegalArgumentException("estimatedInputs must be non-negative: " + estimatedInputs); this.maxSize = maxSize; this.estimatedInputs = estimatedInputs; this.buckets = new ArrayList<>(maxSize + 1); this.stdDev = calculateStandardDeviation ? new StandardDeviation() : null; computeMedianPointBoundaries(maxSize); } private void computeMedianPointBoundaries(int maxSize) { double medianPointDistance = ((double)estimatedInputs) / maxSize; this.medianPointDistance = medianPointDistance == 0 ? 1 : medianPointDistance; this.nextMedianPoint = this.medianPointDistance; assert this.nextMedianPoint > 0 : this.nextMedianPoint; } private final int maxSize; private final long estimatedInputs; private double medianPointDistance; private StandardDeviation stdDev; private double nextMedianPoint; private long inputsCount; private long runningLessThans; private long runningLessThanDistincts; private long bucketsSeen; private long equalsSeen; private final List<Bucket<T>> buckets; }