/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.test.ESIntegTestCase; import java.util.ArrayList; import java.util.List; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.elasticsearch.search.aggregations.AggregationBuilders.filter; import static org.elasticsearch.search.aggregations.AggregationBuilders.significantTerms; import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; import static org.hamcrest.Matchers.equalTo; public class TermsShardMinDocCountIT extends ESIntegTestCase { private static final String index = "someindex"; private static final String type = "testtype"; public String randomExecutionHint() { return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString(); } // see https://github.com/elastic/elasticsearch/issues/5998 public void testShardMinDocCountSignificantTermsTest() throws Exception { String textMappings; if (randomBoolean()) { textMappings = "type=long"; } else { textMappings = "type=text,fielddata=true"; } assertAcked(prepareCreate(index).setSettings(SETTING_NUMBER_OF_SHARDS, 1, SETTING_NUMBER_OF_REPLICAS, 0) .addMapping(type, "text", textMappings)); List<IndexRequestBuilder> indexBuilders = new ArrayList<>(); addTermsDocs("1", 1, 0, indexBuilders);//high score but low doc freq addTermsDocs("2", 1, 0, indexBuilders); addTermsDocs("3", 1, 0, indexBuilders); addTermsDocs("4", 1, 0, indexBuilders); addTermsDocs("5", 3, 1, indexBuilders);//low score but high doc freq addTermsDocs("6", 3, 1, indexBuilders); addTermsDocs("7", 0, 3, indexBuilders);// make sure the terms all get score > 0 except for this one indexRandom(true, false, indexBuilders); // first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned SearchResponse response = client().prepareSearch(index) .addAggregation( (filter("inclass", QueryBuilders.termQuery("class", true))) .subAggregation(significantTerms("mySignificantTerms").field("text").minDocCount(2).size(2).executionHint(randomExecutionHint())) ) .execute() .actionGet(); assertSearchResponse(response); InternalFilter filteredBucket = response.getAggregations().get("inclass"); SignificantTerms sigterms = filteredBucket.getAggregations().get("mySignificantTerms"); assertThat(sigterms.getBuckets().size(), equalTo(0)); response = client().prepareSearch(index) .addAggregation( (filter("inclass", QueryBuilders.termQuery("class", true))) .subAggregation(significantTerms("mySignificantTerms").field("text").minDocCount(2).shardMinDocCount(2).size(2).executionHint(randomExecutionHint())) ) .execute() .actionGet(); assertSearchResponse(response); filteredBucket = response.getAggregations().get("inclass"); sigterms = filteredBucket.getAggregations().get("mySignificantTerms"); assertThat(sigterms.getBuckets().size(), equalTo(2)); } private void addTermsDocs(String term, int numInClass, int numNotInClass, List<IndexRequestBuilder> builders) { String sourceClass = "{\"text\": \"" + term + "\", \"class\":" + "true" + "}"; String sourceNotClass = "{\"text\": \"" + term + "\", \"class\":" + "false" + "}"; for (int i = 0; i < numInClass; i++) { builders.add(client().prepareIndex(index, type).setSource(sourceClass, XContentType.JSON)); } for (int i = 0; i < numNotInClass; i++) { builders.add(client().prepareIndex(index, type).setSource(sourceNotClass, XContentType.JSON)); } } // see https://github.com/elastic/elasticsearch/issues/5998 public void testShardMinDocCountTermsTest() throws Exception { final String [] termTypes = {"text", "long", "integer", "float", "double"}; String termtype = termTypes[randomInt(termTypes.length - 1)]; String termMappings = "type=" + termtype; if (termtype.equals("text")) { termMappings += ",fielddata=true"; } assertAcked(prepareCreate(index).setSettings(SETTING_NUMBER_OF_SHARDS, 1, SETTING_NUMBER_OF_REPLICAS, 0).addMapping(type, "text", termMappings)); List<IndexRequestBuilder> indexBuilders = new ArrayList<>(); addTermsDocs("1", 1, indexBuilders);//low doc freq but high score addTermsDocs("2", 1, indexBuilders); addTermsDocs("3", 1, indexBuilders); addTermsDocs("4", 1, indexBuilders); addTermsDocs("5", 3, indexBuilders);//low score but high doc freq addTermsDocs("6", 3, indexBuilders); indexRandom(true, false, indexBuilders); // first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned SearchResponse response = client().prepareSearch(index) .addAggregation( terms("myTerms").field("text").minDocCount(2).size(2).executionHint(randomExecutionHint()).order(BucketOrder.key(true)) ) .execute() .actionGet(); assertSearchResponse(response); Terms sigterms = response.getAggregations().get("myTerms"); assertThat(sigterms.getBuckets().size(), equalTo(0)); response = client().prepareSearch(index) .addAggregation( terms("myTerms").field("text").minDocCount(2).shardMinDocCount(2).size(2).executionHint(randomExecutionHint()).order(BucketOrder.key(true)) ) .execute() .actionGet(); assertSearchResponse(response); sigterms = response.getAggregations().get("myTerms"); assertThat(sigterms.getBuckets().size(), equalTo(2)); } private void addTermsDocs(String term, int numDocs, List<IndexRequestBuilder> builders) { String sourceClass = "{\"text\": \"" + term + "\"}"; for (int i = 0; i < numDocs; i++) { builders.add(client().prepareIndex(index, type).setSource(sourceClass, XContentType.JSON)); } } }