/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.join.aggregations;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.join.ParentJoinPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
import org.elasticsearch.search.aggregations.metrics.tophits.TopHits;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
import org.elasticsearch.test.ESIntegTestCase.Scope;
import org.junit.Before;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.join.aggregations.JoinAggregationBuilders.children;
import static org.elasticsearch.join.query.JoinQueryBuilders.hasChildQuery;
import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
@ClusterScope(scope = Scope.SUITE)
public class ChildrenIT extends ESIntegTestCase {
private static final Map<String, Control> categoryToControl = new HashMap<>();
@Override
protected boolean ignoreExternalCluster() {
return true;
}
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(ParentJoinPlugin.class);
}
@Override
protected Collection<Class<? extends Plugin>> transportClientPlugins() {
return nodePlugins();
}
@Before
public void setupCluster() throws Exception {
categoryToControl.clear();
assertAcked(
prepareCreate("test")
.setSettings("index.mapping.single_type", false)
.addMapping("article", "category", "type=keyword")
.addMapping("comment", "_parent", "type=article", "commenter", "type=keyword")
);
List<IndexRequestBuilder> requests = new ArrayList<>();
String[] uniqueCategories = new String[randomIntBetween(1, 25)];
for (int i = 0; i < uniqueCategories.length; i++) {
uniqueCategories[i] = Integer.toString(i);
}
int catIndex = 0;
int numParentDocs = randomIntBetween(uniqueCategories.length, uniqueCategories.length * 5);
for (int i = 0; i < numParentDocs; i++) {
String id = Integer.toString(i);
// TODO: this array is always of length 1, and testChildrenAggs fails if this is changed
String[] categories = new String[randomIntBetween(1,1)];
for (int j = 0; j < categories.length; j++) {
String category = categories[j] = uniqueCategories[catIndex++ % uniqueCategories.length];
Control control = categoryToControl.get(category);
if (control == null) {
categoryToControl.put(category, control = new Control(category));
}
control.articleIds.add(id);
}
requests.add(client()
.prepareIndex("test", "article", id).setCreate(true).setSource("category", categories, "randomized", true));
}
String[] commenters = new String[randomIntBetween(5, 50)];
for (int i = 0; i < commenters.length; i++) {
commenters[i] = Integer.toString(i);
}
int id = 0;
for (Control control : categoryToControl.values()) {
for (String articleId : control.articleIds) {
int numChildDocsPerParent = randomIntBetween(0, 5);
for (int i = 0; i < numChildDocsPerParent; i++) {
String commenter = commenters[id % commenters.length];
String idValue = Integer.toString(id++);
control.commentIds.add(idValue);
Set<String> ids = control.commenterToCommentId.get(commenter);
if (ids == null) {
control.commenterToCommentId.put(commenter, ids = new HashSet<>());
}
ids.add(idValue);
requests.add(client().prepareIndex("test", "comment", idValue)
.setCreate(true).setParent(articleId).setSource("commenter", commenter));
}
}
}
requests.add(client().prepareIndex("test", "article", "a")
.setSource("category", new String[]{"a"}, "randomized", false));
requests.add(client().prepareIndex("test", "article", "b")
.setSource("category", new String[]{"a", "b"}, "randomized", false));
requests.add(client().prepareIndex("test", "article", "c")
.setSource("category", new String[]{"a", "b", "c"}, "randomized", false));
requests.add(client().prepareIndex("test", "article", "d")
.setSource("category", new String[]{"c"}, "randomized", false));
requests.add(client().prepareIndex("test", "comment", "a")
.setParent("a").setSource("{}", XContentType.JSON));
requests.add(client().prepareIndex("test", "comment", "c")
.setParent("c").setSource("{}", XContentType.JSON));
indexRandom(true, requests);
ensureSearchable("test");
}
public void testChildrenAggs() throws Exception {
SearchResponse searchResponse = client().prepareSearch("test")
.setQuery(matchQuery("randomized", true))
.addAggregation(
terms("category").field("category").size(10000).subAggregation(children("to_comment", "comment")
.subAggregation(
terms("commenters").field("commenter").size(10000).subAggregation(
topHits("top_comments")
))
)
).get();
assertSearchResponse(searchResponse);
Terms categoryTerms = searchResponse.getAggregations().get("category");
assertThat(categoryTerms.getBuckets().size(), equalTo(categoryToControl.size()));
for (Map.Entry<String, Control> entry1 : categoryToControl.entrySet()) {
Terms.Bucket categoryBucket = categoryTerms.getBucketByKey(entry1.getKey());
assertThat(categoryBucket.getKeyAsString(), equalTo(entry1.getKey()));
assertThat(categoryBucket.getDocCount(), equalTo((long) entry1.getValue().articleIds.size()));
Children childrenBucket = categoryBucket.getAggregations().get("to_comment");
assertThat(childrenBucket.getName(), equalTo("to_comment"));
assertThat(childrenBucket.getDocCount(), equalTo((long) entry1.getValue().commentIds.size()));
assertThat((long) ((InternalAggregation)childrenBucket).getProperty("_count"),
equalTo((long) entry1.getValue().commentIds.size()));
Terms commentersTerms = childrenBucket.getAggregations().get("commenters");
assertThat((Terms) ((InternalAggregation)childrenBucket).getProperty("commenters"), sameInstance(commentersTerms));
assertThat(commentersTerms.getBuckets().size(), equalTo(entry1.getValue().commenterToCommentId.size()));
for (Map.Entry<String, Set<String>> entry2 : entry1.getValue().commenterToCommentId.entrySet()) {
Terms.Bucket commentBucket = commentersTerms.getBucketByKey(entry2.getKey());
assertThat(commentBucket.getKeyAsString(), equalTo(entry2.getKey()));
assertThat(commentBucket.getDocCount(), equalTo((long) entry2.getValue().size()));
TopHits topHits = commentBucket.getAggregations().get("top_comments");
for (SearchHit searchHit : topHits.getHits().getHits()) {
assertThat(entry2.getValue().contains(searchHit.getId()), is(true));
}
}
}
}
public void testParentWithMultipleBuckets() throws Exception {
SearchResponse searchResponse = client().prepareSearch("test")
.setQuery(matchQuery("randomized", false))
.addAggregation(
terms("category").field("category").size(10000).subAggregation(
children("to_comment", "comment").subAggregation(topHits("top_comments").sort("_uid", SortOrder.ASC))
)
).get();
assertSearchResponse(searchResponse);
Terms categoryTerms = searchResponse.getAggregations().get("category");
assertThat(categoryTerms.getBuckets().size(), equalTo(3));
for (Terms.Bucket bucket : categoryTerms.getBuckets()) {
logger.info("bucket={}", bucket.getKey());
Children childrenBucket = bucket.getAggregations().get("to_comment");
TopHits topHits = childrenBucket.getAggregations().get("top_comments");
logger.info("total_hits={}", topHits.getHits().getTotalHits());
for (SearchHit searchHit : topHits.getHits()) {
logger.info("hit= {} {} {}", searchHit.getSortValues()[0], searchHit.getType(), searchHit.getId());
}
}
Terms.Bucket categoryBucket = categoryTerms.getBucketByKey("a");
assertThat(categoryBucket.getKeyAsString(), equalTo("a"));
assertThat(categoryBucket.getDocCount(), equalTo(3L));
Children childrenBucket = categoryBucket.getAggregations().get("to_comment");
assertThat(childrenBucket.getName(), equalTo("to_comment"));
assertThat(childrenBucket.getDocCount(), equalTo(2L));
TopHits topHits = childrenBucket.getAggregations().get("top_comments");
assertThat(topHits.getHits().getTotalHits(), equalTo(2L));
assertThat(topHits.getHits().getAt(0).getId(), equalTo("a"));
assertThat(topHits.getHits().getAt(0).getType(), equalTo("comment"));
assertThat(topHits.getHits().getAt(1).getId(), equalTo("c"));
assertThat(topHits.getHits().getAt(1).getType(), equalTo("comment"));
categoryBucket = categoryTerms.getBucketByKey("b");
assertThat(categoryBucket.getKeyAsString(), equalTo("b"));
assertThat(categoryBucket.getDocCount(), equalTo(2L));
childrenBucket = categoryBucket.getAggregations().get("to_comment");
assertThat(childrenBucket.getName(), equalTo("to_comment"));
assertThat(childrenBucket.getDocCount(), equalTo(1L));
topHits = childrenBucket.getAggregations().get("top_comments");
assertThat(topHits.getHits().getTotalHits(), equalTo(1L));
assertThat(topHits.getHits().getAt(0).getId(), equalTo("c"));
assertThat(topHits.getHits().getAt(0).getType(), equalTo("comment"));
categoryBucket = categoryTerms.getBucketByKey("c");
assertThat(categoryBucket.getKeyAsString(), equalTo("c"));
assertThat(categoryBucket.getDocCount(), equalTo(2L));
childrenBucket = categoryBucket.getAggregations().get("to_comment");
assertThat(childrenBucket.getName(), equalTo("to_comment"));
assertThat(childrenBucket.getDocCount(), equalTo(1L));
topHits = childrenBucket.getAggregations().get("top_comments");
assertThat(topHits.getHits().getTotalHits(), equalTo(1L));
assertThat(topHits.getHits().getAt(0).getId(), equalTo("c"));
assertThat(topHits.getHits().getAt(0).getType(), equalTo("comment"));
}
public void testWithDeletes() throws Exception {
String indexName = "xyz";
assertAcked(
prepareCreate(indexName)
.setSettings("index.mapping.single_type", false)
.addMapping("parent")
.addMapping("child", "_parent", "type=parent", "count", "type=long")
);
List<IndexRequestBuilder> requests = new ArrayList<>();
requests.add(client().prepareIndex(indexName, "parent", "1").setSource("{}", XContentType.JSON));
requests.add(client().prepareIndex(indexName, "child", "0").setParent("1").setSource("count", 1));
requests.add(client().prepareIndex(indexName, "child", "1").setParent("1").setSource("count", 1));
requests.add(client().prepareIndex(indexName, "child", "2").setParent("1").setSource("count", 1));
requests.add(client().prepareIndex(indexName, "child", "3").setParent("1").setSource("count", 1));
indexRandom(true, requests);
for (int i = 0; i < 10; i++) {
SearchResponse searchResponse = client().prepareSearch(indexName)
.addAggregation(children("children", "child").subAggregation(sum("counts").field("count")))
.get();
assertNoFailures(searchResponse);
Children children = searchResponse.getAggregations().get("children");
assertThat(children.getDocCount(), equalTo(4L));
Sum count = children.getAggregations().get("counts");
assertThat(count.getValue(), equalTo(4.));
String idToUpdate = Integer.toString(randomInt(3));
/*
* The whole point of this test is to test these things with deleted
* docs in the index so we turn off detect_noop to make sure that
* the updates cause that.
*/
UpdateResponse updateResponse = client().prepareUpdate(indexName, "child", idToUpdate)
.setParent("1")
.setDoc(Requests.INDEX_CONTENT_TYPE, "count", 1)
.setDetectNoop(false)
.get();
assertThat(updateResponse.getVersion(), greaterThan(1L));
refresh();
}
}
public void testNonExistingChildType() throws Exception {
SearchResponse searchResponse = client().prepareSearch("test")
.addAggregation(
children("non-existing", "xyz")
).get();
assertSearchResponse(searchResponse);
Children children = searchResponse.getAggregations().get("non-existing");
assertThat(children.getName(), equalTo("non-existing"));
assertThat(children.getDocCount(), equalTo(0L));
}
public void testPostCollection() throws Exception {
String indexName = "prodcatalog";
String masterType = "masterprod";
String childType = "variantsku";
assertAcked(
prepareCreate(indexName)
.setSettings("index.mapping.single_type", false)
.addMapping(masterType, "brand", "type=text", "name", "type=keyword", "material", "type=text")
.addMapping(childType, "_parent", "type=masterprod", "color", "type=keyword", "size", "type=keyword")
);
List<IndexRequestBuilder> requests = new ArrayList<>();
requests.add(client().prepareIndex(indexName, masterType, "1")
.setSource("brand", "Levis", "name", "Style 501", "material", "Denim"));
requests.add(client().prepareIndex(indexName, childType, "0").setParent("1").setSource("color", "blue", "size", "32"));
requests.add(client().prepareIndex(indexName, childType, "1").setParent("1").setSource("color", "blue", "size", "34"));
requests.add(client().prepareIndex(indexName, childType, "2").setParent("1").setSource("color", "blue", "size", "36"));
requests.add(client().prepareIndex(indexName, childType, "3").setParent("1").setSource("color", "black", "size", "38"));
requests.add(client().prepareIndex(indexName, childType, "4").setParent("1").setSource("color", "black", "size", "40"));
requests.add(client().prepareIndex(indexName, childType, "5").setParent("1").setSource("color", "gray", "size", "36"));
requests.add(client().prepareIndex(indexName, masterType, "2")
.setSource("brand", "Wrangler", "name", "Regular Cut", "material", "Leather"));
requests.add(client().prepareIndex(indexName, childType, "6").setParent("2").setSource("color", "blue", "size", "32"));
requests.add(client().prepareIndex(indexName, childType, "7").setParent("2").setSource("color", "blue", "size", "34"));
requests.add(client().prepareIndex(indexName, childType, "8").setParent("2").setSource("color", "black", "size", "36"));
requests.add(client().prepareIndex(indexName, childType, "9").setParent("2").setSource("color", "black", "size", "38"));
requests.add(client().prepareIndex(indexName, childType, "10").setParent("2").setSource("color", "black", "size", "40"));
requests.add(client().prepareIndex(indexName, childType, "11").setParent("2").setSource("color", "orange", "size", "36"));
requests.add(client().prepareIndex(indexName, childType, "12").setParent("2").setSource("color", "green", "size", "44"));
indexRandom(true, requests);
SearchResponse response = client().prepareSearch(indexName).setTypes(masterType)
.setQuery(hasChildQuery(childType, termQuery("color", "orange"), ScoreMode.None))
.addAggregation(children("my-refinements", childType)
.subAggregation(terms("my-colors").field("color"))
.subAggregation(terms("my-sizes").field("size"))
).get();
assertNoFailures(response);
assertHitCount(response, 1);
Children childrenAgg = response.getAggregations().get("my-refinements");
assertThat(childrenAgg.getDocCount(), equalTo(7L));
Terms termsAgg = childrenAgg.getAggregations().get("my-colors");
assertThat(termsAgg.getBuckets().size(), equalTo(4));
assertThat(termsAgg.getBucketByKey("black").getDocCount(), equalTo(3L));
assertThat(termsAgg.getBucketByKey("blue").getDocCount(), equalTo(2L));
assertThat(termsAgg.getBucketByKey("green").getDocCount(), equalTo(1L));
assertThat(termsAgg.getBucketByKey("orange").getDocCount(), equalTo(1L));
termsAgg = childrenAgg.getAggregations().get("my-sizes");
assertThat(termsAgg.getBuckets().size(), equalTo(6));
assertThat(termsAgg.getBucketByKey("36").getDocCount(), equalTo(2L));
assertThat(termsAgg.getBucketByKey("32").getDocCount(), equalTo(1L));
assertThat(termsAgg.getBucketByKey("34").getDocCount(), equalTo(1L));
assertThat(termsAgg.getBucketByKey("38").getDocCount(), equalTo(1L));
assertThat(termsAgg.getBucketByKey("40").getDocCount(), equalTo(1L));
assertThat(termsAgg.getBucketByKey("44").getDocCount(), equalTo(1L));
}
public void testHierarchicalChildrenAggs() {
String indexName = "geo";
String grandParentType = "continent";
String parentType = "country";
String childType = "city";
assertAcked(
prepareCreate(indexName)
.setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
)
.setSettings("index.mapping.single_type", false)
.addMapping(grandParentType, "name", "type=keyword")
.addMapping(parentType, "_parent", "type=" + grandParentType)
.addMapping(childType, "_parent", "type=" + parentType)
);
client().prepareIndex(indexName, grandParentType, "1").setSource("name", "europe").get();
client().prepareIndex(indexName, parentType, "2").setParent("1").setSource("name", "belgium").get();
client().prepareIndex(indexName, childType, "3").setParent("2").setRouting("1").setSource("name", "brussels").get();
refresh();
SearchResponse response = client().prepareSearch(indexName)
.setQuery(matchQuery("name", "europe"))
.addAggregation(
children(parentType, parentType).subAggregation(children(childType, childType).subAggregation(
terms("name").field("name")
)
)
)
.get();
assertNoFailures(response);
assertHitCount(response, 1);
Children children = response.getAggregations().get(parentType);
assertThat(children.getName(), equalTo(parentType));
assertThat(children.getDocCount(), equalTo(1L));
children = children.getAggregations().get(childType);
assertThat(children.getName(), equalTo(childType));
assertThat(children.getDocCount(), equalTo(1L));
Terms terms = children.getAggregations().get("name");
assertThat(terms.getBuckets().size(), equalTo(1));
assertThat(terms.getBuckets().get(0).getKey().toString(), equalTo("brussels"));
assertThat(terms.getBuckets().get(0).getDocCount(), equalTo(1L));
}
public void testPostCollectAllLeafReaders() throws Exception {
// The 'towns' and 'parent_names' aggs operate on parent docs and if child docs are in different segments we need
// to ensure those segments which child docs are also evaluated to in the post collect phase.
// Before we only evaluated segments that yielded matches in 'towns' and 'parent_names' aggs, which caused
// us to miss to evaluate child docs in segments we didn't have parent matches for.
assertAcked(
prepareCreate("index")
.setSettings("index.mapping.single_type", false)
.addMapping("parentType", "name", "type=keyword", "town", "type=keyword")
.addMapping("childType", "_parent", "type=parentType", "name", "type=keyword", "age", "type=integer")
);
List<IndexRequestBuilder> requests = new ArrayList<>();
requests.add(client().prepareIndex("index", "parentType", "1").setSource("name", "Bob", "town", "Memphis"));
requests.add(client().prepareIndex("index", "parentType", "2").setSource("name", "Alice", "town", "Chicago"));
requests.add(client().prepareIndex("index", "parentType", "3").setSource("name", "Bill", "town", "Chicago"));
requests.add(client().prepareIndex("index", "childType", "1").setSource("name", "Jill", "age", 5).setParent("1"));
requests.add(client().prepareIndex("index", "childType", "2").setSource("name", "Joey", "age", 3).setParent("1"));
requests.add(client().prepareIndex("index", "childType", "3").setSource("name", "John", "age", 2).setParent("2"));
requests.add(client().prepareIndex("index", "childType", "4").setSource("name", "Betty", "age", 6).setParent("3"));
requests.add(client().prepareIndex("index", "childType", "5").setSource("name", "Dan", "age", 1).setParent("3"));
indexRandom(true, requests);
SearchResponse response = client().prepareSearch("index")
.setSize(0)
.addAggregation(AggregationBuilders.terms("towns").field("town")
.subAggregation(AggregationBuilders.terms("parent_names").field("name")
.subAggregation(children("child_docs", "childType"))
)
)
.get();
Terms towns = response.getAggregations().get("towns");
assertThat(towns.getBuckets().size(), equalTo(2));
assertThat(towns.getBuckets().get(0).getKeyAsString(), equalTo("Chicago"));
assertThat(towns.getBuckets().get(0).getDocCount(), equalTo(2L));
Terms parents = towns.getBuckets().get(0).getAggregations().get("parent_names");
assertThat(parents.getBuckets().size(), equalTo(2));
assertThat(parents.getBuckets().get(0).getKeyAsString(), equalTo("Alice"));
assertThat(parents.getBuckets().get(0).getDocCount(), equalTo(1L));
Children children = parents.getBuckets().get(0).getAggregations().get("child_docs");
assertThat(children.getDocCount(), equalTo(1L));
assertThat(parents.getBuckets().get(1).getKeyAsString(), equalTo("Bill"));
assertThat(parents.getBuckets().get(1).getDocCount(), equalTo(1L));
children = parents.getBuckets().get(1).getAggregations().get("child_docs");
assertThat(children.getDocCount(), equalTo(2L));
assertThat(towns.getBuckets().get(1).getKeyAsString(), equalTo("Memphis"));
assertThat(towns.getBuckets().get(1).getDocCount(), equalTo(1L));
parents = towns.getBuckets().get(1).getAggregations().get("parent_names");
assertThat(parents.getBuckets().size(), equalTo(1));
assertThat(parents.getBuckets().get(0).getKeyAsString(), equalTo("Bob"));
assertThat(parents.getBuckets().get(0).getDocCount(), equalTo(1L));
children = parents.getBuckets().get(0).getAggregations().get("child_docs");
assertThat(children.getDocCount(), equalTo(2L));
}
private static final class Control {
final String category;
final Set<String> articleIds = new HashSet<>();
final Set<String> commentIds = new HashSet<>();
final Map<String, Set<String>> commenterToCommentId = new HashMap<>();
private Control(String category) {
this.category = category;
}
}
}