/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.stats.cardinality;
import com.google.common.base.Preconditions;
import javax.annotation.concurrent.NotThreadSafe;
import static com.facebook.stats.cardinality.BucketAndHash.fromHash;
import static com.facebook.stats.cardinality.HyperLogLogUtil.computeHash;
import static com.google.common.base.Preconditions.checkArgument;
@NotThreadSafe
public class AdaptiveHyperLogLog {
private static final int INSTANCE_SIZE = UnsafeUtil.sizeOf(AdaptiveHyperLogLog.class);
private Estimator estimator;
public AdaptiveHyperLogLog(int numberOfBuckets) {
Preconditions.checkArgument(
Numbers.isPowerOf2(numberOfBuckets),
"numberOfBuckets must be a power of 2"
);
this.estimator = new SparseEstimator(numberOfBuckets);
}
public AdaptiveHyperLogLog(int[] buckets)
{
checkArgument(Numbers.isPowerOf2(buckets.length), "numberOfBuckets must be a power of 2");
estimator = makeEstimator(buckets);
}
/**
* @return true if the estimation was affected by this addition
*/
public boolean add(long value) {
BucketAndHash bucketAndHash = fromHash(computeHash(value), estimator.getNumberOfBuckets());
int lowestBitPosition = Long.numberOfTrailingZeros(bucketAndHash.getHash()) + 1;
if (estimator.getClass() == SparseEstimator.class &&
(estimator.estimateSizeInBytes() >= DenseEstimator.estimateSizeInBytes(estimator.getNumberOfBuckets())
|| lowestBitPosition >= SparseEstimator.MAX_BUCKET_VALUE)) {
estimator = new DenseEstimator(estimator.buckets());
}
return estimator.setIfGreater(bucketAndHash.getBucket(), lowestBitPosition);
}
public long estimate() {
return estimator.estimate();
}
public int getSizeInBytes() {
return estimator.estimateSizeInBytes() + INSTANCE_SIZE;
}
public int getNumberOfBuckets() {
return estimator.getNumberOfBuckets();
}
public int[] buckets() {
return estimator.buckets();
}
public void merge(AdaptiveHyperLogLog other) {
estimator = makeEstimator(HyperLogLogUtil.mergeBuckets(this.buckets(), other.buckets()));
}
public static AdaptiveHyperLogLog merge(AdaptiveHyperLogLog first, AdaptiveHyperLogLog second) {
return new AdaptiveHyperLogLog(HyperLogLogUtil.mergeBuckets(first.buckets(), second.buckets()));
}
private static Estimator makeEstimator(int[] buckets) {
int nonZeroBuckets = 0;
int maxValue = 0;
for (int value : buckets) {
maxValue = Math.max(maxValue, value);
if (value > 0) {
++nonZeroBuckets;
}
}
if (maxValue < SparseEstimator.MAX_BUCKET_VALUE &&
SparseEstimator.estimateSizeInBytes(nonZeroBuckets, buckets.length) < DenseEstimator.estimateSizeInBytes(buckets.length)) {
return new SparseEstimator(buckets);
}
else {
return new DenseEstimator(buckets);
}
}
}