/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.collections.specialized;
import com.facebook.util.digest.DigestFunction;
import com.facebook.util.digest.LongMurmur3Hash;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.HashSet;
import java.util.Set;
public class TestSampledSetImpl {
private int maxSetSize;
private SampledSet<Long> integerSet;
@BeforeMethod(alwaysRun = true)
public void setUp() throws Exception {
// needs to be a multiple of 4
maxSetSize = 8;
DigestFunction<Long> longMurmur3Hash = new LongMurmur3Hash();
integerSet = new SampledSetImpl<Long>(
maxSetSize, longMurmur3Hash, new IntegerHashSetFactory(maxSetSize)
);
}
@Test(groups = "fast")
public void testSanity() throws Exception {
int largeMaxSetSize = 4000;
IntegerHashSetFactory intHashSetFactory = new IntegerHashSetFactory(largeMaxSetSize);
SampledSet<Long> largeSet =
new SampledSetImpl<Long>(largeMaxSetSize, new LongMurmur3Hash(), intHashSetFactory);
int numElements = 2000 * largeMaxSetSize;
for (int i = 0; i < numElements; i++) {
largeSet.add((long)i);
}
assertValidMaxSize();
float error =
Math.abs(largeSet.getScaledSize() - numElements) / (float) numElements;
String message = String.format(
"actual: %d estimate: %d error: %f",
numElements,
largeSet.getScaledSize(),
error
);
// this test case is deterministic and we expect less than 2-3%
Assert.assertTrue(error < 0.02, message);
}
@Test(groups = "fast")
public void testAddDuplicateKeys() throws Exception {
// add the set to full
Set<Integer> elements = new HashSet<Integer>(maxSetSize);
for (int i = 0; i < 100000; i += 2) { // just find 8 elements
if (integerSet.add((long)i)) {
elements.add(i);
if (elements.size() == maxSetSize) {
break;
}
}
}
Assert.assertEquals(integerSet.size(), maxSetSize);
Assert.assertEquals(integerSet.getScaledSize(), maxSetSize);
SampledSet<Long> setCopy = integerSet.makeSnapshot();
System.err.println("");
// add those elements again, down sampling should not happen
for (Integer i : elements) {
if (integerSet.add((long) i)) {
System.err.println("");
}
}
Assert.assertEquals(setCopy.getEntries(), integerSet.getEntries());
// Assert.assertEquals(integerSet.size(), maxSetSize);
// Assert.assertEquals(integerSet.getScaledSize(), maxSetSize);
}
private void assertValidMaxSize() {
Assert.assertTrue(
integerSet.getSize() <= maxSetSize,
String.format(
"max size %d exceeded at %d", maxSetSize, integerSet.getSize()
)
);
}
@Test(groups = "fast")
public void testSetSizeBelowMax() throws Exception {
int numElements = maxSetSize;
for (int i = 0; i < numElements; i++) {
integerSet.add((long)i);
}
Assert.assertEquals(integerSet.getScaledSize(), numElements);
}
@Test(groups = "fast")
public void testMaxNeverExceeded() throws Exception {
int numElements = 10 * maxSetSize;
for (int i = 0; i < numElements; i++) {
integerSet.add((long)i);
assertValidMaxSize();
}
}
@Test(groups = "fast")
public void testMerge() throws Exception {
IntegerHashSetFactory longHashSetFactory = new IntegerHashSetFactory(maxSetSize);
SampledSet<Long> otherSet =
new SampledSetImpl<Long>(maxSetSize / 4, new LongMurmur3Hash(), longHashSetFactory);
int firstSetSize = maxSetSize / 2;
// populate the first set to its max size
for (int i = 0; i < firstSetSize; i++) {
integerSet.add((long)i);
}
Assert.assertEquals(integerSet.getScaledSize(), firstSetSize);
int secondSetSize = maxSetSize / 4;
// populate the second set to its max size
for (int i = 0; i < secondSetSize; i++) {
otherSet.add((long)i);
}
Assert.assertEquals(otherSet.getScaledSize(), secondSetSize);
// merge into the first
SampledSet<Long> mergeIntoFirst = integerSet.merge(otherSet);
// merge into the second
SampledSet<Long> mergeIntoSecond = otherSet.merge(integerSet);
// make sure we take the max of the maxSetSizes
int maxofMaxSetSize =
Math.max(integerSet.getMaxSetSize(), otherSet.getMaxSetSize());
Assert.assertEquals(
mergeIntoFirst.getMaxSetSize(),
maxofMaxSetSize
);
Assert.assertTrue(mergeIntoFirst.getSize() < maxofMaxSetSize);
// same if we merge into the second set
Assert.assertEquals(mergeIntoSecond.getMaxSetSize(), secondSetSize);
}
@Test(groups = "fast")
public void testMergeWithEmpty() throws Exception {
IntegerHashSetFactory longHashSetFactory = new IntegerHashSetFactory(4);
DigestFunction<Long> digestFunction = new LongMurmur3Hash();
SampledSet<Long> set1 =
new SampledSetImpl<Long>(4, digestFunction, longHashSetFactory);
SampledSet<Long> set2 =
new SampledSetImpl<Long>(4, digestFunction, longHashSetFactory);
set1.add(1L);
set1.add(2L);
set1.add(3L);
set1.add(4L);
set1.add(5L);
SampledSet<Long> merged = set1.merge(set2);
Assert.assertEquals(merged.getScaledSize(), set1.getScaledSize());
}
@Test(groups = "fast")
public void testMergeWithDownSample() throws Exception {
IntegerHashSetFactory intHashSetFactory = new IntegerHashSetFactory(4);
SampledSet<Long> set1 =
new SampledSetImpl<Long>(2, LongMurmur3Hash.getInstance(), intHashSetFactory);
SampledSet<Long> set2 =
new SampledSetImpl<Long>(3, LongMurmur3Hash.getInstance(), intHashSetFactory);
// set 1 will have 1 element and sample rate of 1
set1.add(1L);
// set 2 will have 2 elements and sample rate of 2
set2.add(2L);
set2.add(4L);
set2.add(8L);
set2.add(16L);
// scaled sizes are 1 and 4
Assert.assertEquals(set1.getScaledSize(), 1);
Assert.assertEquals(set2.getScaledSize(), 6);
// now merge: should result in a set with a max size of 2 and a sample
// rate of 2
SampledSet<Long> merge1with2 = set1.merge(set2);
SampledSet<Long> merge2with1 = set2.merge(set1);
// merge is NOT symmetric as the higher sample rate must be used
Assert.assertEquals(merge1with2.getScaledSize(), 4);
Assert.assertEquals(merge2with1.getScaledSize(), 6);
}
@Test(groups = "fast")
public void testHasChanged() throws Exception {
// initial set should not indicate it has changed
Assert.assertFalse(integerSet.hasChanged());
// newly constructed set from merge() should also return false
Assert.assertFalse(integerSet.merge(integerSet).hasChanged());
}
@Test(groups = "fast")
public void testProposedSize() throws Exception {
SampledSet<Long> set1 =
new SampledSetImpl<Long>(8, new LongMurmur3Hash(), new IntegerHashSetFactory());
SampledSet<Long> set2 =
new SampledSetImpl<Long>(8, new LongMurmur3Hash(), new IntegerHashSetFactory());
SampledSet<Long> set3 =
new SampledSetImpl<Long>(8, new LongMurmur3Hash(), new IntegerHashSetFactory());
// set 1 will have 4 elements and sample rate of 1
set1.add(0L);
set1.add(1L);
set1.add(2L);
set1.add(3L);
// set 2 will have 4 elements and sample rate of 1
set2.add(4L);
set2.add(5L);
set2.add(6L);
set2.add(7L);
// set 3 will have 1 elements and sample rate of 1
set3.add(9L);
// scaled sizes are 1 and 4
Assert.assertEquals(set1.getScaledSize(), 4);
Assert.assertEquals(set2.getScaledSize(), 4);
Assert.assertEquals(set3.getScaledSize(), 1);
// now merge:
// note that making snapshot should not change "proposedSize"
SampledSet<Long> merged = set1.makeSnapshot();
merged.mergeInPlaceWith(set2.makeTransientSnapshot());
merged = merged.makeTransientSnapshot();
// at this point, merged is a full set. We will add one more element to it.
merged.mergeInPlaceWith(set3);
// now merged should be down sampled
Assert.assertEquals(merged.getSize(), 6);
Assert.assertEquals(merged.getScaledSize(), 12);
}
}