/**
* Copyright (C) 2009-2013 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.server.store.statistics.histograms;
import com.foundationdb.util.ArgumentValidation;
import com.foundationdb.util.Flywheel;
import com.foundationdb.util.Recycler;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.Iterator;
import java.util.List;
/**
* Combines the SplitHandler (T visitor) with Buckets (T aggregator).
* @param <T> the thing to be sampled
*/
public class Sampler<T extends Comparable<? super T>> extends SplitHandler<T> {
@Override
protected void handle(int segmentIndex, T input, int count) {
BucketSampler<T> bucketSampler = bucketSamplerList.get(segmentIndex);
Bucket<T> bucket = bucketsFlywheel.get();
bucket.init(input, count);
if (!bucketSampler.add(bucket))
bucketsFlywheel.recycle(bucket);
}
@Override
public void finish() {
super.finish();
finished = true;
}
public List<List<Bucket<T>>> toBuckets() {
if (!finished) {
throw new IllegalStateException("never called finish() after visiting");
}
List<PopularitySplit<T>> popularitySplits = splitStreamsByPopularity();
return mergePopularitySplitStreams(popularitySplits);
}
private List<PopularitySplit<T>> splitStreamsByPopularity() {
List<PopularitySplit<T>> results = new ArrayList<>(segments);
for (BucketSampler<T> sampler : bucketSamplerList) {
PopularitySplit<T> popularitySplit = splitByPopularity(sampler);
results.add(popularitySplit);
}
return results;
}
private PopularitySplit<T> splitByPopularity(BucketSampler<T> sampler) {
List<Bucket<T>> samples = sampler.buckets();
Deque<Bucket<T>> popular = new ArrayDeque<>(samples.size());
int popularsCount = 0;
int regularsCount = 0;
// a bucket is "exceptionally popular" if its popularity (equals-count) is more than one standard dev
// above average. Also keep the first (min key) bucket.
long popularityCutoff = Math.round(sampler.getEqualsMean() + sampler.getEqualsStdDev());
for (Iterator<Bucket<T>> iter = samples.iterator(); iter.hasNext(); ) {
Bucket<T> sample = iter.next();
long sampleCount = sample.getEqualsCount() + sample.getLessThanCount();
if (sample.isMinKeyBucket() || sample.getEqualsCount() >= popularityCutoff) {
iter.remove();
popular.add(sample);
popularsCount += sampleCount;
}
else {
regularsCount += sampleCount;
}
}
return new PopularitySplit<>(regularsCount, samples, popularsCount, popular);
}
private List<List<Bucket<T>>> mergePopularitySplitStreams(List<PopularitySplit<T>> popularitySplits) {
List<List<Bucket<T>>> results = new ArrayList<>(popularitySplits.size());
for (PopularitySplit<T> split : popularitySplits) {
List<Bucket<T>> merged = mergePopularitySplit(split);
results.add(merged);
}
return results;
}
private List<Bucket<T>> mergePopularitySplit(PopularitySplit<T> split) {
// if populars.size() >= maxSize,
// we sample the populars, merging the unpopular buckets into the sampled populars as we go
// if populars.size() < maxSize,
// we sample the unpopulars, appending the popular buckets into the sampling as we go
return split.popularBuckets.size() >= maxSize
? mergeUnpopularsIntoPopulars(split)
: mergePopularsIntoUnpopulars(split);
}
private List<Bucket<T>> mergePopularsIntoUnpopulars(PopularitySplit<T> split) {
Deque<Bucket<T>> populars = split.popularBuckets;
assert populars.size() < maxSize : "failed populars.size[" + populars.size() + "] < maxSize[" + maxSize + "]";
int unpopularsNeeded = maxSize - populars.size();
BucketSampler<T> sampler = new BucketSampler<>(unpopularsNeeded, split.regularsCount, false);
for (Bucket<T> regularBucket : split.regularBuckets) {
assert !regularBucket.isMinKeyBucket(); // Min-key buckets were deemed popular
T regularValue = regularBucket.value();
while (!populars.isEmpty()) {
Bucket<T> popularBucket = populars.getFirst();
// The min-key bucket, by definition, comes before any bucket, including regularBucket.
// But there is an explicit test for the min-key bucket to make it clear that it must be included.
if (popularBucket.isMinKeyBucket() || popularBucket.value().compareTo(regularValue) < 0)
sampler.appendToResults(populars.removeFirst()); // and the loop will try again
else
break;
}
sampler.add(regularBucket);
}
for (Bucket<T> popularBucket : populars)
sampler.appendToResults(popularBucket);
return sampler.buckets();
}
private List<Bucket<T>> mergeUnpopularsIntoPopulars(PopularitySplit<T> split) {
Deque<Bucket<T>> populars = split.popularBuckets;
assert populars.size() >= maxSize : "failed populars.size[" + populars.size() + "] >= maxSize[" + maxSize + "]";
PeekingIterator<Bucket<T>> unpopulars = Iterators.peekingIterator(split.regularBuckets.iterator());
List<Bucket<T>> results = new ArrayList<>(populars.size());
BucketSampler<T> sampler = new BucketSampler<>(maxSize, split.popularsCount, false);
for (Bucket<T> popular : populars) {
if (sampler.add(popular)) {
// merge in all the unpopulars less than this one
while (unpopulars.hasNext() && unpopulars.peek().value().compareTo(popular.value()) <= 0) {
Bucket<T> mergeMe = unpopulars.next();
mergeUp(mergeMe, popular);
}
results.add(popular);
}
}
// now, create one last value which merges in all of the remaining unpopulars
Bucket<T> last = null;
while(unpopulars.hasNext()) {
Bucket<T> unpopular = unpopulars.next();
if (last != null)
mergeUp(last, unpopular);
last = unpopular;
}
if (last != null)
results.add(last);
return results;
}
private void mergeUp(Bucket<T> from, Bucket<T> into) {
into.addLessThanDistincts(from.getLessThanDistinctsCount() + 1);
into.addLessThans(from.getLessThanCount() + from.getEqualsCount());
}
public Sampler(Splitter<T> splitter, int maxSize, long estimatedInputs, Recycler<? super T> recycler) {
super(splitter);
int segments = splitter.segments();
ArgumentValidation.isGT("segments", segments, 0);
bucketSamplerList = new ArrayList<>(segments);
this.maxSize = maxSize;
int oversampleSize = maxSize * OVERSAMPLE_FACTOR;
for (int i=0; i < segments; ++i) {
bucketSamplerList.add(new BucketSampler<T>(oversampleSize, estimatedInputs));
}
this.segments = segments;
this.bucketsFlywheel = new BucketFlywheel<>(oversampleSize, segments, recycler);
}
private final List<BucketSampler<T>> bucketSamplerList;
private final int segments;
private final int maxSize;
private boolean finished = false;
private final Flywheel<Bucket<T>> bucketsFlywheel;
public static final int OVERSAMPLE_FACTOR = 50;
private static class PopularitySplit<T> {
private PopularitySplit(int regularsCount, List<Bucket<T>> regularBuckets,
int popularsCount, Deque<Bucket<T>> popularBuckets
) {
this.regularBuckets = regularBuckets;
this.popularBuckets = popularBuckets;
this.popularsCount = popularsCount;
this.regularsCount = regularsCount;
}
private final List<Bucket<T>> regularBuckets;
private final Deque<Bucket<T>> popularBuckets;
private final int popularsCount;
private final int regularsCount;
}
private static class BucketFlywheel<T> extends Flywheel<Bucket<T>> {
@Override
protected Bucket<T> createNew() {
++created;
// SpatialQueryDT fails here. Probably because the estimate of index row count, which is the table row
// count, is so far off for a z-order index of boxes. mmcm suggests disabling the assertion.
// assert created <= createdLimit : created + " > " + createdLimit;
return new Bucket<>();
}
@Override
public void recycle(Bucket<T> element) {
super.recycle(element);
valueRecycler.recycle(element.value());
}
private BucketFlywheel(int maxSize, int segments, Recycler<? super T> valueRecycler) {
this.createdLimit = (maxSize+1) * segments;
this.valueRecycler = valueRecycler;
}
private final Recycler<? super T> valueRecycler;
private final int createdLimit;
private int created;
}
}