/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.stats.cardinality;
import com.google.common.io.Closeables;
import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import static org.testng.Assert.assertEquals;
public class TestHyperLogLogCodec {
@Test
public void testHyperLogLogRoundtrip() throws IOException {
testHyperLogLogRoundtrip(1024);
testHyperLogLogRoundtrip(2048);
testHyperLogLogRoundtrip(4096);
}
private void testHyperLogLogRoundtrip(int buckets) throws IOException {
HyperLogLog expected = new HyperLogLog(buckets);
for (int i = 0; i < 30000; ++i) {
expected.add(i);
}
HyperLogLogCodec codec = new HyperLogLogCodec();
// encode
ByteArrayOutputStream out = new ByteArrayOutputStream(buckets);
codec.encodeHyperLogLog(expected, out);
byte[] compressed = out.toByteArray();
// decode
HyperLogLog actual = codec.decodeHyperLogLog(new ByteArrayInputStream(compressed));
// verify results
assertEquals(actual.buckets(), expected.buckets());
assertEquals(actual.estimate(), expected.estimate());
}
@Test
public void testAdaptiveHyperLogLogRoundtripLowCardinality() throws IOException {
testAdaptiveHyperLogLogRoundtrip(1024, 10);
testAdaptiveHyperLogLogRoundtrip(2048, 10);
testAdaptiveHyperLogLogRoundtrip(4096, 10);
}
@Test
public void testAdaptiveHyperLogLogRoundtripHighCardinality() throws IOException {
testAdaptiveHyperLogLogRoundtrip(1024, 30000);
testAdaptiveHyperLogLogRoundtrip(2048, 30000);
testAdaptiveHyperLogLogRoundtrip(4096, 30000);
}
private void testAdaptiveHyperLogLogRoundtrip(int buckets, int cardinality) throws IOException {
AdaptiveHyperLogLog expected = new AdaptiveHyperLogLog(buckets);
for (int i = 0; i < cardinality; ++i) {
expected.add(i);
}
HyperLogLogCodec codec = new HyperLogLogCodec();
// encode
ByteArrayOutputStream out = new ByteArrayOutputStream(buckets);
codec.encodeAdaptiveHyperLogLog(expected, out);
byte[] compressed = out.toByteArray();
// decode
AdaptiveHyperLogLog actual = codec.decodeAdaptiveHyperLogLog(new ByteArrayInputStream(compressed));
// verify results
assertEquals(actual.buckets(), expected.buckets());
assertEquals(actual.estimate(), expected.estimate());
}
@Test
public void testDeserializationBackwardsCompatibility() throws Exception {
HyperLogLogCodec codec = new HyperLogLogCodec();
for (int cardinality = 10; cardinality <= 100000; cardinality *= 10) {
for (int bucketCount = 1024; bucketCount <= 4096; bucketCount <<= 1) {
String fileBaseName = String.format(
"serialization/HyperLogLog-%d-%d",
bucketCount,
cardinality
);
// read the raw bucket values
AdaptiveHyperLogLog expected;
InputStream in = getClass().getClassLoader().getResourceAsStream(fileBaseName + ".raw");
try {
int[] buckets = new int[bucketCount];
for (int i = 0; i < bucketCount; i++) {
buckets[i] = in.read();
}
expected = new AdaptiveHyperLogLog(buckets);
} finally {
Closeables.close(in, true);
}
// write the serialized data
AdaptiveHyperLogLog actual;
in = getClass().getClassLoader().getResourceAsStream(fileBaseName + ".ser");
try {
actual = codec.decodeAdaptiveHyperLogLog(in);
} finally {
Closeables.close(in, true);
}
// verify results
assertEquals(actual.buckets(), expected.buckets());
assertEquals(actual.estimate(), expected.estimate());
}
}
}
/**
* Generate new serialized HyperLogLog files for backwards compatibility test.
*/
public static void main(String[] args) throws Exception {
File directory = new File("src/test/resources/serialization");
directory.mkdirs();
HyperLogLogCodec codec = new HyperLogLogCodec();
for (int cardinality = 10; cardinality <= 100000; cardinality *= 10) {
for (int bucketCounts = 1024; bucketCounts <= 4096; bucketCounts <<= 1) {
AdaptiveHyperLogLog hyperLogLog = new AdaptiveHyperLogLog(bucketCounts);
for (int i = 0; i < cardinality; ++i) {
hyperLogLog.add(i);
}
String fileBaseName = String.format("HyperLogLog-%d-%d", bucketCounts, cardinality);
// write the serialized data
OutputStream out = new FileOutputStream(new File(directory, fileBaseName + ".ser"));
try {
codec.encodeAdaptiveHyperLogLog(hyperLogLog, out);
} finally {
Closeables.close(out, true);
}
// write the raw bucket values
out = new FileOutputStream(new File(directory, fileBaseName + ".raw"));
try {
for (int bucketValue : hyperLogLog.buckets()) {
out.write(bucketValue);
}
} finally {
Closeables.close(out, true);
}
}
}
}
}