/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.range;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedBinaryRangeLeafCollector;
import org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedSetRangeLeafCollector;
import org.elasticsearch.test.ESTestCase;
import com.carrotsearch.hppc.LongHashSet;
public class BinaryRangeAggregatorTests extends ESTestCase {
private static class FakeSortedSetDocValues extends AbstractSortedSetDocValues {
private final BytesRef[] terms;
long[] ords;
private int i;
FakeSortedSetDocValues(BytesRef[] terms) {
this.terms = terms;
}
@Override
public boolean advanceExact(int docID) {
i = 0;
return true;
}
@Override
public long nextOrd() {
if (i == ords.length) {
return NO_MORE_ORDS;
}
return ords[i++];
}
@Override
public BytesRef lookupOrd(long ord) {
return terms[(int) ord];
}
@Override
public long getValueCount() {
return terms.length;
}
}
private void doTestSortedSetRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i),
randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))),
randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedSetDocValues values = new FakeSortedSetDocValues(terms);
final int[] counts = new int[ranges.length];
SortedSetRangeLeafCollector collector = new SortedSetRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0)
&& (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
public void testSortedSetRangeLeafCollectorSingleValued() throws Exception {
final int iters = randomInt(10);
for (int i = 0; i < iters; ++i) {
doTestSortedSetRangeLeafCollector(1);
}
}
public void testSortedSetRangeLeafCollectorMultiValued() throws Exception {
final int iters = randomInt(10);
for (int i = 0; i < iters; ++i) {
doTestSortedSetRangeLeafCollector(5);
}
}
private static class FakeSortedBinaryDocValues extends SortedBinaryDocValues {
private final BytesRef[] terms;
int i;
long[] ords;
FakeSortedBinaryDocValues(BytesRef[] terms) {
this.terms = terms;
}
@Override
public boolean advanceExact(int docID) {
i = 0;
return true;
}
@Override
public int docValueCount() {
return ords.length;
}
@Override
public BytesRef nextValue() {
return terms[(int) ords[i++]];
}
}
private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i),
randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))),
randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
final int[] counts = new int[ranges.length];
SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0)
&& (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
public void testSortedBinaryRangeLeafCollectorSingleValued() throws Exception {
final int iters = randomInt(10);
for (int i = 0; i < iters; ++i) {
doTestSortedBinaryRangeLeafCollector(1);
}
}
public void testSortedBinaryRangeLeafCollectorMultiValued() throws Exception {
final int iters = randomInt(10);
for (int i = 0; i < iters; ++i) {
doTestSortedBinaryRangeLeafCollector(5);
}
}
}