/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations;
import com.carrotsearch.hppc.IntHashSet;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.script.MockScriptPlugin;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.range.Range;
import org.elasticsearch.search.aggregations.bucket.range.Range.Bucket;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
import org.elasticsearch.test.ESIntegTestCase;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.extendedStats;
import static org.elasticsearch.search.aggregations.AggregationBuilders.filter;
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
import static org.elasticsearch.search.aggregations.AggregationBuilders.max;
import static org.elasticsearch.search.aggregations.AggregationBuilders.min;
import static org.elasticsearch.search.aggregations.AggregationBuilders.percentiles;
import static org.elasticsearch.search.aggregations.AggregationBuilders.range;
import static org.elasticsearch.search.aggregations.AggregationBuilders.stats;
import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.IsNull.notNullValue;
/**
* Additional tests that aim at testing more complex aggregation trees on larger random datasets, so that things like
* the growth of dynamic arrays is tested.
*/
public class EquivalenceIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(CustomScriptPlugin.class);
}
public static class CustomScriptPlugin extends MockScriptPlugin {
@Override
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
return Collections.singletonMap("floor(_value / interval)", vars -> {
Double value = (Double) vars.get("_value");
Integer interval = (Integer) vars.get("interval");
return Math.floor(value / interval.doubleValue());
});
}
}
// Make sure that unordered, reversed, disjoint and/or overlapping ranges are supported
// Duel with filters
public void testRandomRanges() throws Exception {
final int numDocs = scaledRandomIntBetween(500, 5000);
final double[][] docs = new double[numDocs][];
for (int i = 0; i < numDocs; ++i) {
final int numValues = randomInt(5);
docs[i] = new double[numValues];
for (int j = 0; j < numValues; ++j) {
docs[i][j] = randomDouble() * 100;
}
}
prepareCreate("idx")
.addMapping("type", jsonBuilder()
.startObject()
.startObject("type")
.startObject("properties")
.startObject("values")
.field("type", "double")
.endObject()
.endObject()
.endObject()
.endObject()).execute().actionGet();
for (int i = 0; i < docs.length; ++i) {
XContentBuilder source = jsonBuilder()
.startObject()
.startArray("values");
for (int j = 0; j < docs[i].length; ++j) {
source = source.value(docs[i][j]);
}
source = source.endArray().endObject();
client().prepareIndex("idx", "type").setSource(source).execute().actionGet();
}
assertNoFailures(client().admin().indices().prepareRefresh("idx").
setIndicesOptions(IndicesOptions.lenientExpandOpen())
.get());
final int numRanges = randomIntBetween(1, 20);
final double[][] ranges = new double[numRanges][];
for (int i = 0; i < ranges.length; ++i) {
switch (randomInt(2)) {
case 0:
ranges[i] = new double[] { Double.NEGATIVE_INFINITY, randomInt(100) };
break;
case 1:
ranges[i] = new double[] { randomInt(100), Double.POSITIVE_INFINITY };
break;
case 2:
ranges[i] = new double[] { randomInt(100), randomInt(100) };
break;
default:
throw new AssertionError();
}
}
RangeAggregationBuilder query = range("range").field("values");
for (int i = 0; i < ranges.length; ++i) {
String key = Integer.toString(i);
if (ranges[i][0] == Double.NEGATIVE_INFINITY) {
query.addUnboundedTo(key, ranges[i][1]);
} else if (ranges[i][1] == Double.POSITIVE_INFINITY) {
query.addUnboundedFrom(key, ranges[i][0]);
} else {
query.addRange(key, ranges[i][0], ranges[i][1]);
}
}
SearchRequestBuilder reqBuilder = client().prepareSearch("idx").addAggregation(query);
for (int i = 0; i < ranges.length; ++i) {
RangeQueryBuilder filter = QueryBuilders.rangeQuery("values");
if (ranges[i][0] != Double.NEGATIVE_INFINITY) {
filter = filter.from(ranges[i][0]);
}
if (ranges[i][1] != Double.POSITIVE_INFINITY){
filter = filter.to(ranges[i][1]);
}
reqBuilder = reqBuilder.addAggregation(filter("filter" + i, filter));
}
SearchResponse resp = reqBuilder.execute().actionGet();
Range range = resp.getAggregations().get("range");
List<? extends Bucket> buckets = range.getBuckets();
HashMap<String, Bucket> bucketMap = new HashMap<>(buckets.size());
for (Bucket bucket : buckets) {
bucketMap.put(bucket.getKeyAsString(), bucket);
}
for (int i = 0; i < ranges.length; ++i) {
long count = 0;
for (double[] values : docs) {
for (double value : values) {
if (value >= ranges[i][0] && value < ranges[i][1]) {
++count;
break;
}
}
}
final Range.Bucket bucket = bucketMap.get(Integer.toString(i));
assertEquals(bucket.getKeyAsString(), Integer.toString(i), bucket.getKeyAsString());
assertEquals(bucket.getKeyAsString(), count, bucket.getDocCount());
final Filter filter = resp.getAggregations().get("filter" + i);
assertThat(filter.getDocCount(), equalTo(count));
}
}
// test long/double/string terms aggs with high number of buckets that require array growth
public void testDuelTerms() throws Exception {
final int numDocs = scaledRandomIntBetween(1000, 2000);
final int maxNumTerms = randomIntBetween(10, 5000);
final IntHashSet valuesSet = new IntHashSet();
cluster().wipeIndices("idx");
prepareCreate("idx")
.addMapping("type", jsonBuilder()
.startObject()
.startObject("type")
.startObject("properties")
.startObject("num")
.field("type", "double")
.endObject()
.startObject("string_values")
.field("type", "keyword")
.startObject("fields")
.startObject("doc_values")
.field("type", "keyword")
.field("index", false)
.endObject()
.endObject()
.endObject()
.startObject("long_values")
.field("type", "long")
.endObject()
.startObject("double_values")
.field("type", "double")
.endObject()
.endObject()
.endObject()
.endObject()).execute().actionGet();
List<IndexRequestBuilder> indexingRequests = new ArrayList<>();
for (int i = 0; i < numDocs; ++i) {
final int[] values = new int[randomInt(4)];
for (int j = 0; j < values.length; ++j) {
values[j] = randomInt(maxNumTerms - 1) - 1000;
valuesSet.add(values[j]);
}
XContentBuilder source = jsonBuilder()
.startObject()
.field("num", randomDouble())
.startArray("long_values");
for (int j = 0; j < values.length; ++j) {
source = source.value(values[j]);
}
source = source.endArray().startArray("double_values");
for (int j = 0; j < values.length; ++j) {
source = source.value((double) values[j]);
}
source = source.endArray().startArray("string_values");
for (int j = 0; j < values.length; ++j) {
source = source.value(Integer.toString(values[j]));
}
source = source.endArray().endObject();
indexingRequests.add(client().prepareIndex("idx", "type").setSource(source));
}
indexRandom(true, indexingRequests);
assertNoFailures(client().admin().indices().prepareRefresh("idx")
.setIndicesOptions(IndicesOptions.lenientExpandOpen())
.execute().get());
TermsAggregatorFactory.ExecutionMode[] globalOrdinalModes = new TermsAggregatorFactory.ExecutionMode[] {
TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS_HASH, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS
};
SearchResponse resp = client().prepareSearch("idx")
.addAggregation(
terms("long")
.field("long_values")
.size(maxNumTerms)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.subAggregation(min("min").field("num")))
.addAggregation(
terms("double")
.field("double_values")
.size(maxNumTerms)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.subAggregation(max("max").field("num")))
.addAggregation(
terms("string_map")
.field("string_values")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.executionHint(TermsAggregatorFactory.ExecutionMode.MAP.toString())
.size(maxNumTerms)
.subAggregation(stats("stats").field("num")))
.addAggregation(
terms("string_global_ordinals")
.field("string_values")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.executionHint(globalOrdinalModes[randomInt(globalOrdinalModes.length - 1)].toString())
.size(maxNumTerms)
.subAggregation(extendedStats("stats").field("num")))
.addAggregation(
terms("string_global_ordinals_doc_values")
.field("string_values.doc_values")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.executionHint(globalOrdinalModes[randomInt(globalOrdinalModes.length - 1)].toString())
.size(maxNumTerms)
.subAggregation(extendedStats("stats").field("num")))
.execute().actionGet();
assertAllSuccessful(resp);
assertEquals(numDocs, resp.getHits().getTotalHits());
final Terms longTerms = resp.getAggregations().get("long");
final Terms doubleTerms = resp.getAggregations().get("double");
final Terms stringMapTerms = resp.getAggregations().get("string_map");
final Terms stringGlobalOrdinalsTerms = resp.getAggregations().get("string_global_ordinals");
final Terms stringGlobalOrdinalsDVTerms = resp.getAggregations().get("string_global_ordinals_doc_values");
assertEquals(valuesSet.size(), longTerms.getBuckets().size());
assertEquals(valuesSet.size(), doubleTerms.getBuckets().size());
assertEquals(valuesSet.size(), stringMapTerms.getBuckets().size());
assertEquals(valuesSet.size(), stringGlobalOrdinalsTerms.getBuckets().size());
assertEquals(valuesSet.size(), stringGlobalOrdinalsDVTerms.getBuckets().size());
for (Terms.Bucket bucket : longTerms.getBuckets()) {
final Terms.Bucket doubleBucket = doubleTerms.getBucketByKey(Double.toString(Long.parseLong(bucket.getKeyAsString())));
final Terms.Bucket stringMapBucket = stringMapTerms.getBucketByKey(bucket.getKeyAsString());
final Terms.Bucket stringGlobalOrdinalsBucket = stringGlobalOrdinalsTerms.getBucketByKey(bucket.getKeyAsString());
final Terms.Bucket stringGlobalOrdinalsDVBucket = stringGlobalOrdinalsDVTerms.getBucketByKey(bucket.getKeyAsString());
assertNotNull(doubleBucket);
assertNotNull(stringMapBucket);
assertNotNull(stringGlobalOrdinalsBucket);
assertNotNull(stringGlobalOrdinalsDVBucket);
assertEquals(bucket.getDocCount(), doubleBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringMapBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringGlobalOrdinalsBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringGlobalOrdinalsDVBucket.getDocCount());
}
}
// Duel between histograms and scripted terms
public void testDuelTermsHistogram() throws Exception {
prepareCreate("idx")
.addMapping("type", jsonBuilder()
.startObject()
.startObject("type")
.startObject("properties")
.startObject("num")
.field("type", "double")
.endObject()
.endObject()
.endObject()
.endObject()).execute().actionGet();
final int numDocs = scaledRandomIntBetween(500, 5000);
final int maxNumTerms = randomIntBetween(10, 2000);
final int interval = randomIntBetween(1, 100);
final Integer[] values = new Integer[maxNumTerms];
for (int i = 0; i < values.length; ++i) {
values[i] = randomInt(maxNumTerms * 3) - maxNumTerms;
}
for (int i = 0; i < numDocs; ++i) {
XContentBuilder source = jsonBuilder()
.startObject()
.field("num", randomDouble())
.startArray("values");
final int numValues = randomInt(4);
for (int j = 0; j < numValues; ++j) {
source = source.value(randomFrom(values));
}
source = source.endArray().endObject();
client().prepareIndex("idx", "type").setSource(source).execute().actionGet();
}
assertNoFailures(client().admin().indices().prepareRefresh("idx")
.setIndicesOptions(IndicesOptions.lenientExpandOpen())
.execute().get());
Map<String, Object> params = new HashMap<>();
params.put("interval", interval);
SearchResponse resp = client().prepareSearch("idx")
.addAggregation(
terms("terms")
.field("values")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.script(new Script(ScriptType.INLINE, CustomScriptPlugin.NAME, "floor(_value / interval)", params))
.size(maxNumTerms))
.addAggregation(
histogram("histo")
.field("values")
.interval(interval)
.minDocCount(1))
.execute().actionGet();
assertSearchResponse(resp);
Terms terms = resp.getAggregations().get("terms");
assertThat(terms, notNullValue());
Histogram histo = resp.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(terms.getBuckets().size(), equalTo(histo.getBuckets().size()));
for (Histogram.Bucket bucket : histo.getBuckets()) {
final double key = ((Number) bucket.getKey()).doubleValue() / interval;
final Terms.Bucket termsBucket = terms.getBucketByKey(String.valueOf(key));
assertEquals(bucket.getDocCount(), termsBucket.getDocCount());
}
}
public void testLargeNumbersOfPercentileBuckets() throws Exception {
// test high numbers of percentile buckets to make sure paging and release work correctly
prepareCreate("idx")
.addMapping("type", jsonBuilder()
.startObject()
.startObject("type")
.startObject("properties")
.startObject("double_value")
.field("type", "double")
.endObject()
.endObject()
.endObject()
.endObject()).execute().actionGet();
final int numDocs = scaledRandomIntBetween(2500, 5000);
logger.info("Indexing [{}] docs", numDocs);
List<IndexRequestBuilder> indexingRequests = new ArrayList<>();
for (int i = 0; i < numDocs; ++i) {
indexingRequests.add(client().prepareIndex("idx", "type", Integer.toString(i)).setSource("double_value", randomDouble()));
}
indexRandom(true, indexingRequests);
SearchResponse response = client().prepareSearch("idx")
.addAggregation(
terms("terms")
.field("double_value")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.subAggregation(percentiles("pcts").field("double_value")))
.execute().actionGet();
assertAllSuccessful(response);
assertEquals(numDocs, response.getHits().getTotalHits());
}
// https://github.com/elastic/elasticsearch/issues/6435
public void testReduce() throws Exception {
createIndex("idx");
final int value = randomIntBetween(0, 10);
indexRandom(true, client().prepareIndex("idx", "type").setSource("f", value));
SearchResponse response = client().prepareSearch("idx")
.addAggregation(filter("filter", QueryBuilders.matchAllQuery())
.subAggregation(range("range")
.field("f")
.addUnboundedTo(6)
.addUnboundedFrom(6)
.subAggregation(sum("sum").field("f"))))
.execute().actionGet();
assertSearchResponse(response);
Filter filter = response.getAggregations().get("filter");
assertNotNull(filter);
assertEquals(1, filter.getDocCount());
Range range = filter.getAggregations().get("range");
assertThat(range, notNullValue());
assertThat(range.getName(), equalTo("range"));
List<? extends Bucket> buckets = range.getBuckets();
assertThat(buckets.size(), equalTo(2));
Range.Bucket bucket = buckets.get(0);
assertThat(bucket, notNullValue());
assertThat((String) bucket.getKey(), equalTo("*-6.0"));
assertThat(((Number) bucket.getFrom()).doubleValue(), equalTo(Double.NEGATIVE_INFINITY));
assertThat(((Number) bucket.getTo()).doubleValue(), equalTo(6.0));
assertThat(bucket.getDocCount(), equalTo(value < 6 ? 1L : 0L));
Sum sum = bucket.getAggregations().get("sum");
assertEquals(value < 6 ? value : 0, sum.getValue(), 0d);
bucket = buckets.get(1);
assertThat(bucket, notNullValue());
assertThat((String) bucket.getKey(), equalTo("6.0-*"));
assertThat(((Number) bucket.getFrom()).doubleValue(), equalTo(6.0));
assertThat(((Number) bucket.getTo()).doubleValue(), equalTo(Double.POSITIVE_INFINITY));
assertThat(bucket.getDocCount(), equalTo(value >= 6 ? 1L : 0L));
sum = bucket.getAggregations().get("sum");
assertEquals(value >= 6 ? value : 0, sum.getValue(), 0d);
}
private void assertEquals(Terms t1, Terms t2) {
List<? extends Terms.Bucket> t1Buckets = t1.getBuckets();
List<? extends Terms.Bucket> t2Buckets = t1.getBuckets();
assertEquals(t1Buckets.size(), t2Buckets.size());
for (Iterator<? extends Terms.Bucket> it1 = t1Buckets.iterator(), it2 = t2Buckets.iterator(); it1.hasNext(); ) {
final Terms.Bucket b1 = it1.next();
final Terms.Bucket b2 = it2.next();
assertEquals(b1.getDocCount(), b2.getDocCount());
assertEquals(b1.getKey(), b2.getKey());
}
}
public void testDuelDepthBreadthFirst() throws Exception {
createIndex("idx");
final int numDocs = randomIntBetween(100, 500);
List<IndexRequestBuilder> reqs = new ArrayList<>();
for (int i = 0; i < numDocs; ++i) {
final int v1 = randomInt(1 << randomInt(7));
final int v2 = randomInt(1 << randomInt(7));
final int v3 = randomInt(1 << randomInt(7));
reqs.add(client().prepareIndex("idx", "type").setSource("f1", v1, "f2", v2, "f3", v3));
}
indexRandom(true, reqs);
final SearchResponse r1 = client().prepareSearch("idx").addAggregation(
terms("f1").field("f1").collectMode(SubAggCollectionMode.DEPTH_FIRST)
.subAggregation(terms("f2").field("f2").collectMode(SubAggCollectionMode.DEPTH_FIRST)
.subAggregation(terms("f3").field("f3").collectMode(SubAggCollectionMode.DEPTH_FIRST)))).get();
assertSearchResponse(r1);
final SearchResponse r2 = client().prepareSearch("idx").addAggregation(
terms("f1").field("f1").collectMode(SubAggCollectionMode.BREADTH_FIRST)
.subAggregation(terms("f2").field("f2").collectMode(SubAggCollectionMode.BREADTH_FIRST)
.subAggregation(terms("f3").field("f3").collectMode(SubAggCollectionMode.BREADTH_FIRST)))).get();
assertSearchResponse(r2);
final Terms t1 = r1.getAggregations().get("f1");
final Terms t2 = r2.getAggregations().get("f1");
assertEquals(t1, t2);
for (Terms.Bucket b1 : t1.getBuckets()) {
final Terms.Bucket b2 = t2.getBucketByKey(b1.getKeyAsString());
final Terms sub1 = b1.getAggregations().get("f2");
final Terms sub2 = b2.getAggregations().get("f2");
assertEquals(sub1, sub2);
for (Terms.Bucket subB1 : sub1.getBuckets()) {
final Terms.Bucket subB2 = sub2.getBucketByKey(subB1.getKeyAsString());
final Terms subSub1 = subB1.getAggregations().get("f3");
final Terms subSub2 = subB2.getAggregations().get("f3");
assertEquals(subSub1, subSub2);
}
}
}
}