/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongSet;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.test.ESIntegTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.dateHistogram;
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful;
@ESIntegTestCase.SuiteScopeTestCase
public class MinDocCountIT extends AbstractTermsTestCase {
private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
private static int cardinality;
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(CustomScriptPlugin.class);
}
public static class CustomScriptPlugin extends AggregationTestScriptsPlugin {
@Override
@SuppressWarnings("unchecked")
protected Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
scripts.put("doc['d'].values", vars -> {
Map<?, ?> doc = (Map) vars.get("doc");
ScriptDocValues.Doubles value = (ScriptDocValues.Doubles) doc.get("d");
return value.getValues();
});
scripts.put("doc['l'].values", vars -> {
Map<?, ?> doc = (Map) vars.get("doc");
ScriptDocValues.Longs value = (ScriptDocValues.Longs) doc.get("l");
return value.getValues();
});
scripts.put("doc['s'].values", vars -> {
Map<?, ?> doc = (Map) vars.get("doc");
ScriptDocValues.Strings value = (ScriptDocValues.Strings) doc.get("s");
return value.getValues();
});
return scripts;
}
}
@Override
public void setupSuiteScopeCluster() throws Exception {
assertAcked(client().admin().indices().prepareCreate("idx")
.addMapping("type", "s", "type=keyword").get());
cardinality = randomIntBetween(8, 30);
final List<IndexRequestBuilder> indexRequests = new ArrayList<>();
final Set<String> stringTerms = new HashSet<>();
final LongSet longTerms = new LongHashSet();
for (int i = 0; i < cardinality; ++i) {
String stringTerm;
do {
stringTerm = RandomStrings.randomAsciiOfLength(random(), 8);
} while (!stringTerms.add(stringTerm));
long longTerm;
do {
longTerm = randomInt(cardinality * 2);
} while (!longTerms.add(longTerm));
double doubleTerm = longTerm * Math.PI;
String dateTerm = DateTimeFormat.forPattern("yyyy-MM-dd")
.print(new DateTime(2014, 1, ((int) longTerm % 20) + 1, 0, 0, DateTimeZone.UTC));
final int frequency = randomBoolean() ? 1 : randomIntBetween(2, 20);
for (int j = 0; j < frequency; ++j) {
indexRequests.add(client().prepareIndex("idx", "type").setSource(jsonBuilder()
.startObject()
.field("s", stringTerm)
.field("l", longTerm)
.field("d", doubleTerm)
.field("date", dateTerm)
.field("match", randomBoolean())
.endObject()));
}
}
cardinality = stringTerms.size();
indexRandom(true, indexRequests);
ensureSearchable();
}
private enum Script {
NO {
@Override
TermsAggregationBuilder apply(TermsAggregationBuilder builder, String field) {
return builder.field(field);
}
},
YES {
@Override
TermsAggregationBuilder apply(TermsAggregationBuilder builder, String field) {
return builder.script(new org.elasticsearch.script.Script(ScriptType.INLINE,
CustomScriptPlugin.NAME, "doc['" + field + "'].values", Collections.emptyMap()));
}
};
abstract TermsAggregationBuilder apply(TermsAggregationBuilder builder, String field);
}
// check that terms2 is a subset of terms1
private void assertSubset(Terms terms1, Terms terms2, long minDocCount, int size, String include) {
final Matcher matcher = include == null ? null : Pattern.compile(include).matcher("");;
final Iterator<? extends Terms.Bucket> it1 = terms1.getBuckets().iterator();
final Iterator<? extends Terms.Bucket> it2 = terms2.getBuckets().iterator();
int size2 = 0;
while (it1.hasNext()) {
final Terms.Bucket bucket1 = it1.next();
if (bucket1.getDocCount() >= minDocCount && (matcher == null || matcher.reset(bucket1.getKeyAsString()).matches())) {
if (size2++ == size) {
break;
}
assertTrue("minDocCount: " + minDocCount, it2.hasNext());
final Terms.Bucket bucket2 = it2.next();
assertEquals("minDocCount: " + minDocCount, bucket1.getDocCount(), bucket2.getDocCount());
}
}
assertFalse(it2.hasNext());
}
private void assertSubset(Histogram histo1, Histogram histo2, long minDocCount) {
final Iterator<? extends Histogram.Bucket> it2 = histo2.getBuckets().iterator();
for (Histogram.Bucket b1 : histo1.getBuckets()) {
if (b1.getDocCount() >= minDocCount) {
final Histogram.Bucket b2 = it2.next();
assertEquals(b1.getKey(), b2.getKey());
assertEquals(b1.getDocCount(), b2.getDocCount());
}
}
}
public void testStringTermAsc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, BucketOrder.key(true));
}
public void testStringScriptTermAsc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, BucketOrder.key(true));
}
public void testStringTermDesc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, BucketOrder.key(false));
}
public void testStringScriptTermDesc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, BucketOrder.key(false));
}
public void testStringCountAsc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, BucketOrder.count(true));
}
public void testStringScriptCountAsc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, BucketOrder.count(true));
}
public void testStringCountDesc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, BucketOrder.count(false));
}
public void testStringScriptCountDesc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, BucketOrder.count(false));
}
public void testStringCountAscWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.NO, BucketOrder.count(true), ".*a.*", true);
}
public void testStringScriptCountAscWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.YES, BucketOrder.count(true), ".*a.*", true);
}
public void testStringCountDescWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.NO, BucketOrder.count(false), ".*a.*", true);
}
public void testStringScriptCountDescWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.YES, BucketOrder.count(false), ".*a.*", true);
}
public void testLongTermAsc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, BucketOrder.key(true));
}
public void testLongScriptTermAsc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, BucketOrder.key(true));
}
public void testLongTermDesc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, BucketOrder.key(false));
}
public void testLongScriptTermDesc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, BucketOrder.key(false));
}
public void testLongCountAsc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, BucketOrder.count(true));
}
public void testLongScriptCountAsc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, BucketOrder.count(true));
}
public void testLongCountDesc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, BucketOrder.count(false));
}
public void testLongScriptCountDesc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, BucketOrder.count(false));
}
public void testDoubleTermAsc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, BucketOrder.key(true));
}
public void testDoubleScriptTermAsc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, BucketOrder.key(true));
}
public void testDoubleTermDesc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, BucketOrder.key(false));
}
public void testDoubleScriptTermDesc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, BucketOrder.key(false));
}
public void testDoubleCountAsc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, BucketOrder.count(true));
}
public void testDoubleScriptCountAsc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, BucketOrder.count(true));
}
public void testDoubleCountDesc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, BucketOrder.count(false));
}
public void testDoubleScriptCountDesc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, BucketOrder.count(false));
}
private void testMinDocCountOnTerms(String field, Script script, BucketOrder order) throws Exception {
testMinDocCountOnTerms(field, script, order, null, true);
}
private void testMinDocCountOnTerms(String field, Script script, BucketOrder order, String include, boolean retry) throws Exception {
// all terms
final SearchResponse allTermsResponse = client().prepareSearch("idx").setTypes("type")
.setSize(0)
.setQuery(QUERY)
.addAggregation(script.apply(terms("terms"), field)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.executionHint(randomExecutionHint())
.order(order)
.size(cardinality + randomInt(10))
.minDocCount(0))
.execute().actionGet();
assertAllSuccessful(allTermsResponse);
final Terms allTerms = allTermsResponse.getAggregations().get("terms");
assertEquals(cardinality, allTerms.getBuckets().size());
for (long minDocCount = 0; minDocCount < 20; ++minDocCount) {
final int size = randomIntBetween(1, cardinality + 2);
final SearchRequest request = client().prepareSearch("idx").setTypes("type")
.setSize(0)
.setQuery(QUERY)
.addAggregation(script.apply(terms("terms"), field)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.executionHint(randomExecutionHint())
.order(order)
.size(size)
.includeExclude(include == null ? null : new IncludeExclude(include, null))
.shardSize(cardinality + randomInt(10))
.minDocCount(minDocCount)).request();
final SearchResponse response = client().search(request).get();
assertAllSuccessful(response);
assertSubset(allTerms, (Terms) response.getAggregations().get("terms"), minDocCount, size, include);
}
}
public void testHistogramCountAsc() throws Exception {
testMinDocCountOnHistogram(BucketOrder.count(true));
}
public void testHistogramCountDesc() throws Exception {
testMinDocCountOnHistogram(BucketOrder.count(false));
}
public void testHistogramKeyAsc() throws Exception {
testMinDocCountOnHistogram(BucketOrder.key(true));
}
public void testHistogramKeyDesc() throws Exception {
testMinDocCountOnHistogram(BucketOrder.key(false));
}
public void testDateHistogramCountAsc() throws Exception {
testMinDocCountOnDateHistogram(BucketOrder.count(true));
}
public void testDateHistogramCountDesc() throws Exception {
testMinDocCountOnDateHistogram(BucketOrder.count(false));
}
public void testDateHistogramKeyAsc() throws Exception {
testMinDocCountOnDateHistogram(BucketOrder.key(true));
}
public void testDateHistogramKeyDesc() throws Exception {
testMinDocCountOnDateHistogram(BucketOrder.key(false));
}
private void testMinDocCountOnHistogram(BucketOrder order) throws Exception {
final int interval = randomIntBetween(1, 3);
final SearchResponse allResponse = client().prepareSearch("idx").setTypes("type")
.setSize(0)
.setQuery(QUERY)
.addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(0))
.execute().actionGet();
final Histogram allHisto = allResponse.getAggregations().get("histo");
for (long minDocCount = 0; minDocCount < 50; ++minDocCount) {
final SearchResponse response = client().prepareSearch("idx").setTypes("type")
.setSize(0)
.setQuery(QUERY)
.addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(minDocCount))
.execute().actionGet();
assertSubset(allHisto, (Histogram) response.getAggregations().get("histo"), minDocCount);
}
}
private void testMinDocCountOnDateHistogram(BucketOrder order) throws Exception {
final SearchResponse allResponse = client().prepareSearch("idx").setTypes("type")
.setSize(0)
.setQuery(QUERY)
.addAggregation(
dateHistogram("histo")
.field("date")
.dateHistogramInterval(DateHistogramInterval.DAY)
.order(order)
.minDocCount(0))
.get();
final Histogram allHisto = allResponse.getAggregations().get("histo");
for (long minDocCount = 0; minDocCount < 50; ++minDocCount) {
final SearchResponse response = client().prepareSearch("idx").setTypes("type")
.setSize(0)
.setQuery(QUERY)
.addAggregation(
dateHistogram("histo")
.field("date")
.dateHistogramInterval(DateHistogramInterval.DAY)
.order(order)
.minDocCount(minDocCount))
.get();
assertSubset(allHisto, response.getAggregations().get("histo"), minDocCount);
}
}
}