/* * Copyright (C) 2012 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.stats.cardinality; import org.testng.annotations.Test; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.IOException; import static org.testng.Assert.assertEquals; public class TestHyperLogLogCodecSpeed { private static final int LOOPS = 10000; private static final int WARM_LOOPS = 1000; @Test(groups = "slow", enabled = false) public void testHyperLogLog() throws Exception { warm(); testHyperLogLog(10); testHyperLogLog(11); testHyperLogLog(12); } public void testHyperLogLog(int log2m) throws Exception { System.out.printf( "%11s %11s %6s %4s %4s %4s %9s %6s %6s\n", "actual", "estimate", "error", "in", "out", "ent", "bits/Byte", "enc ms", "dec ms" ); HyperLogLog hyperLogLog = new HyperLogLog(1 << log2m); long currentSize = 0; for (long size = 1; size < 200000000; size <<= 1) { for (; currentSize < size; currentSize++) { hyperLogLog.add(currentSize); } long estimate = hyperLogLog.estimate(); double err = Math.abs(estimate - size) / (double) size; int entropy = Utils.entropy(Utils.histogram(hyperLogLog.buckets())) / 8; testBytes(hyperLogLog, log2m, size, estimate, err, entropy); } System.out.println(); } public static void testBytes( HyperLogLog hyperLogLog, int log2m, long size, long estimate, double err, int entropy ) throws IOException { int buckets = 1 << log2m; HyperLogLogCodec codec = new HyperLogLogCodec(); // encode ByteArrayOutputStream out = new ByteArrayOutputStream(buckets); codec.encodeHyperLogLog(hyperLogLog, out); byte[] compressed = out.toByteArray(); // decode HyperLogLog hyperLogLogNew = codec.decodeHyperLogLog(new ByteArrayInputStream(compressed)); // verify results assertEquals(hyperLogLog.buckets(), hyperLogLogNew.buckets()); // time encode and decode double encodeMs = timeEncode(codec, hyperLogLog); double decodeMs = timeDecode(codec, compressed); // print info double bitsPerByte = 1000.0 * compressed.length * 8.0 / (double) buckets / 1000.0; System.out.printf( "%11d %11d %5.4f %4d %4d %4d %5.4f %5.4f %5.4f\n", size, estimate, err, buckets, compressed.length, entropy, bitsPerByte, encodeMs, decodeMs ); } private static void warm() throws IOException { int bucketCount = 1 << 11; HyperLogLog hyperLogLog = new HyperLogLog(bucketCount); for (long i = 0; i < 100000; i++) { hyperLogLog.add(i); } HyperLogLogCodec codec = new HyperLogLogCodec(); double encodeMs = timeEncode(codec, hyperLogLog); System.out.printf("encode %5.4f\n", encodeMs); ByteArrayOutputStream out = new ByteArrayOutputStream(bucketCount); codec.encodeHyperLogLog(hyperLogLog, out); byte[] buf = out.toByteArray(); double decodeMs = timeDecode(codec, buf); System.out.printf("decode %5.4f\n", decodeMs); } private static double timeEncode(HyperLogLogCodec codec, HyperLogLog hyperLogLog) throws IOException { int buckets = hyperLogLog.buckets().length; long encodeTime = 0; for (int i = 0; i < LOOPS; i++) { ByteArrayOutputStream out = new ByteArrayOutputStream(buckets); long startTime = System.nanoTime(); codec.encodeHyperLogLog(hyperLogLog, out); long delta = System.nanoTime() - startTime; if (i > WARM_LOOPS) { encodeTime += delta; } } return encodeTime / 1.0e6 / (LOOPS - WARM_LOOPS); } private static double timeDecode(HyperLogLogCodec codec, byte[] buf) throws IOException { long decodeTime = 0; for (int i = 0; i < LOOPS; i++) { long startTime = System.nanoTime(); codec.decodeHyperLogLog(new DataInputStream(new ByteArrayInputStream(buf))); long delta = System.nanoTime() - startTime; if (i > WARM_LOOPS) { decodeTime += delta; } } return decodeTime / 1.0e6 / (LOOPS - WARM_LOOPS); } }