/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.stats.cardinality;
import com.google.common.base.Throwables;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.Random;
import static java.lang.String.format;
public class BenchmarkAdaptiveHyperLogLog {
private static final int COMPRESSION_LOOPS = 10000;
private static final int COMPRESSION_WARM_LOOPS = 1000;
public static void main(String[] args) {
System.out.println("Warming up...");
System.out.println();
benchmark(1024, (1L << 20), false);
System.out.println("Benchmarking...");
System.out.println();
long count = (1L << 30);
benchmark(1024, count, true);
benchmark(2048, count, true);
benchmark(4096, count, true);
}
private static void benchmark(int buckets, long count, boolean report) {
if (report) {
System.out.println(
format(
"-- %s buckets (%.2f%% error)", buckets, 100 * 1.04 / Math.sqrt(
buckets
)
)
);
System.out.println();
System.out.println(
" | adaptive | fixed | delta fixed vs adapt.| size (bytes) | | serialization "
);
System.out.println(
" actual | estimate error % | estimate error % | count error % | actual entropy mean | ns/add add/s | bytes enc ms dec ms"
);
}
HyperLogLog fixedEstimator = new HyperLogLog(buckets);
AdaptiveHyperLogLog adaptiveEstimator = new AdaptiveHyperLogLog(buckets);
HyperLogLogCodec codec = new HyperLogLogCodec();
Random random = new Random();
long reportInterval = 1;
long nanos = 0;
for (long i = 1; i <= count; ++i) {
long value = random.nextLong();
long start = System.nanoTime();
adaptiveEstimator.add(value);
nanos += System.nanoTime() - start;
fixedEstimator.add(value);
if (report && i % reportInterval == 0 || i % 5000000 == 0) {
long adaptiveEstimate = adaptiveEstimator.estimate();
double adaptiveError = (adaptiveEstimate - i) * 100.0 / i;
long fixedEstimate = fixedEstimator.estimate();
double fixedError = (fixedEstimate - i) * 100.0 / i;
int encodeSize = encodeSize(codec, adaptiveEstimator);
double encodeMs = timeEncode(codec, adaptiveEstimator);
double decodeMs = timeDecode(codec, adaptiveEstimator);
System.out.print(
format(
"\r(%3d%%) %11d | %11d %7.2f | %11d %7.2f | %11d %7.2f | %6d %7d %7.2f | %6d %10.2f | %5d %5.4f %5.4f",
i * 100 / count,
i,
adaptiveEstimate,
adaptiveError,
fixedEstimate,
fixedError,
adaptiveEstimate - fixedEstimate,
Math.abs(adaptiveError) - Math.abs(fixedError),
adaptiveEstimator.getSizeInBytes(),
Utils.entropy(Utils.histogram(adaptiveEstimator.buckets())) / 8,
adaptiveEstimator.getSizeInBytes() * 1.0 / i,
nanos / i,
i / (nanos / 1.0e9),
encodeSize,
encodeMs,
decodeMs
)
);
if (report && i % reportInterval == 0) {
System.out.println();
reportInterval *= 2;
}
}
}
if (report) {
System.out.println();
System.out.println();
}
}
private static int encodeSize(HyperLogLogCodec codec, AdaptiveHyperLogLog hyperLogLog) {
try {
ByteArrayOutputStream out = new ByteArrayOutputStream(hyperLogLog.buckets().length);
codec.encodeAdaptiveHyperLogLog(hyperLogLog, out);
byte[] buf = out.toByteArray();
return buf.length;
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
private static double timeEncode(HyperLogLogCodec codec, AdaptiveHyperLogLog hyperLogLog) {
try {
int buckets = hyperLogLog.buckets().length;
long encodeTime = 0;
for (int i = 0; i < COMPRESSION_LOOPS; i++) {
ByteArrayOutputStream out = new ByteArrayOutputStream(buckets);
long startTime = System.nanoTime();
codec.encodeAdaptiveHyperLogLog(hyperLogLog, out);
long delta = System.nanoTime() - startTime;
if (i > COMPRESSION_WARM_LOOPS) {
encodeTime += delta;
}
}
return encodeTime / 1.0e6 / (COMPRESSION_LOOPS - COMPRESSION_WARM_LOOPS);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
private static double timeDecode(HyperLogLogCodec codec, AdaptiveHyperLogLog hyperLogLog) {
try {
ByteArrayOutputStream out = new ByteArrayOutputStream(hyperLogLog.buckets().length);
codec.encodeAdaptiveHyperLogLog(hyperLogLog, out);
byte[] buf = out.toByteArray();
long decodeTime = 0;
for (int i = 0; i < COMPRESSION_LOOPS; i++) {
long startTime = System.nanoTime();
codec.decodeAdaptiveHyperLogLog(new DataInputStream(new ByteArrayInputStream(buf)));
long delta = System.nanoTime() - startTime;
if (i > COMPRESSION_WARM_LOOPS) {
decodeTime += delta;
}
}
return decodeTime / 1.0e6 / (COMPRESSION_LOOPS - COMPRESSION_WARM_LOOPS);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
}