/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lucene.grouping; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.CompositeReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.Collector; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.Weight; import org.apache.lucene.search.grouping.CollapseTopFieldDocs; import org.apache.lucene.search.grouping.CollapsingTopDocsCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.TestUtil; import org.elasticsearch.test.ESTestCase; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; public class CollapsingTopDocsCollectorTests extends ESTestCase { private static class SegmentSearcher extends IndexSearcher { private final List<LeafReaderContext> ctx; SegmentSearcher(LeafReaderContext ctx, IndexReaderContext parent) { super(parent); this.ctx = Collections.singletonList(ctx); } public void search(Weight weight, Collector collector) throws IOException { search(ctx, weight, collector); } @Override public String toString() { return "ShardSearcher(" + ctx.get(0) + ")"; } } interface CollapsingDocValuesProducer<T extends Comparable> { T randomGroup(int maxGroup); void add(Document doc, T value, boolean multivalued); SortField sortField(boolean multivalued); } <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric) throws IOException { assertSearchCollapse(dvProducers, numeric, true); assertSearchCollapse(dvProducers, numeric, false); } private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException { final int numDocs = randomIntBetween(1000, 2000); int maxGroup = randomIntBetween(2, 500); final Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir); Set<T> values = new HashSet<>(); int totalHits = 0; for (int i = 0; i < numDocs; i++) { final T value = dvProducers.randomGroup(maxGroup); values.add(value); Document doc = new Document(); dvProducers.add(doc, value, multivalued); doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10))); doc.add(new NumericDocValuesField("sort2", randomLong())); w.addDocument(doc); totalHits++; } List<T> valueList = new ArrayList<>(values); Collections.sort(valueList); final IndexReader reader = w.getReader(); final IndexSearcher searcher = newSearcher(reader); final SortField collapseField = dvProducers.sortField(multivalued); final SortField sort1 = new SortField("sort1", SortField.Type.INT); final SortField sort2 = new SortField("sort2", SortField.Type.LONG); Sort sort = new Sort(sort1, sort2, collapseField); int expectedNumGroups = values.size(); final CollapsingTopDocsCollector collapsingCollector; if (numeric) { collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false); } else { collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false); } TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false); searcher.search(new MatchAllDocsQuery(), collapsingCollector); searcher.search(new MatchAllDocsQuery(), topFieldCollector); CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs(); TopFieldDocs topDocs = topFieldCollector.topDocs(); assertEquals(collapseField.getField(), collapseTopFieldDocs.field); assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length); assertEquals(totalHits, collapseTopFieldDocs.totalHits); assertEquals(totalHits, topDocs.scoreDocs.length); assertEquals(totalHits, topDocs.totalHits); Set<Object> seen = new HashSet<>(); // collapse field is the last sort int collapseIndex = sort.getSort().length - 1; int topDocsIndex = 0; for (int i = 0; i < expectedNumGroups; i++) { FieldDoc fieldDoc = null; for (; topDocsIndex < totalHits; topDocsIndex++) { fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex]; if (seen.contains(fieldDoc.fields[collapseIndex]) == false) { break; } } FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i]; assertNotNull(fieldDoc); assertEquals(collapseFieldDoc.doc, fieldDoc.doc); assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields); seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]); } for (; topDocsIndex < totalHits; topDocsIndex++) { FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex]; assertTrue(seen.contains(fieldDoc.fields[collapseIndex])); } // check merge final IndexReaderContext ctx = searcher.getTopReaderContext(); final SegmentSearcher[] subSearchers; final int[] docStarts; if (ctx instanceof LeafReaderContext) { subSearchers = new SegmentSearcher[1]; docStarts = new int[1]; subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx); docStarts[0] = 0; } else { final CompositeReaderContext compCTX = (CompositeReaderContext) ctx; final int size = compCTX.leaves().size(); subSearchers = new SegmentSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) { final LeafReaderContext leave = compCTX.leaves().get(searcherIDX); subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.reader().maxDoc(); } } final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length]; final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false); for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) { final SegmentSearcher subSearcher = subSearchers[shardIDX]; final CollapsingTopDocsCollector c; if (numeric) { c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false); } else { c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false); } subSearcher.search(weight, c); shardHits[shardIDX] = c.getTopDocs(); } CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits, true); assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs); w.close(); reader.close(); dir.close(); } private static void assertTopDocsEquals(CollapseTopFieldDocs topDocs1, CollapseTopFieldDocs topDocs2) { TestUtil.assertEquals(topDocs1, topDocs2); assertArrayEquals(topDocs1.collapseValues, topDocs2.collapseValues); } public void testCollapseLong() throws Exception { CollapsingDocValuesProducer producer = new CollapsingDocValuesProducer<Long>() { @Override public Long randomGroup(int maxGroup) { return randomNonNegativeLong() % maxGroup; } @Override public void add(Document doc, Long value, boolean multivalued) { if (multivalued) { doc.add(new SortedNumericDocValuesField("field", value)); } else { doc.add(new NumericDocValuesField("field", value)); } } @Override public SortField sortField(boolean multivalued) { if (multivalued) { return new SortedNumericSortField("field", SortField.Type.LONG); } else { return new SortField("field", SortField.Type.LONG); } } }; assertSearchCollapse(producer, true); } public void testCollapseInt() throws Exception { CollapsingDocValuesProducer producer = new CollapsingDocValuesProducer<Integer>() { @Override public Integer randomGroup(int maxGroup) { return randomIntBetween(0, maxGroup - 1); } @Override public void add(Document doc, Integer value, boolean multivalued) { if (multivalued) { doc.add(new SortedNumericDocValuesField("field", value)); } else { doc.add(new NumericDocValuesField("field", value)); } } @Override public SortField sortField(boolean multivalued) { if (multivalued) { return new SortedNumericSortField("field", SortField.Type.INT); } else { return new SortField("field", SortField.Type.INT); } } }; assertSearchCollapse(producer, true); } public void testCollapseFloat() throws Exception { CollapsingDocValuesProducer producer = new CollapsingDocValuesProducer<Float>() { @Override public Float randomGroup(int maxGroup) { return new Float(randomIntBetween(0, maxGroup - 1)); } @Override public void add(Document doc, Float value, boolean multivalued) { if (multivalued) { doc.add(new SortedNumericDocValuesField("field", NumericUtils.floatToSortableInt(value))); } else { doc.add(new NumericDocValuesField("field", Float.floatToIntBits(value))); } } @Override public SortField sortField(boolean multivalued) { if (multivalued) { return new SortedNumericSortField("field", SortField.Type.FLOAT); } else { return new SortField("field", SortField.Type.FLOAT); } } }; assertSearchCollapse(producer, true); } public void testCollapseDouble() throws Exception { CollapsingDocValuesProducer producer = new CollapsingDocValuesProducer<Double>() { @Override public Double randomGroup(int maxGroup) { return new Double(randomIntBetween(0, maxGroup - 1)); } @Override public void add(Document doc, Double value, boolean multivalued) { if (multivalued) { doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value))); } else { doc.add(new NumericDocValuesField("field", Double.doubleToLongBits(value))); } } @Override public SortField sortField(boolean multivalued) { if (multivalued) { return new SortedNumericSortField("field", SortField.Type.DOUBLE); } else { return new SortField("field", SortField.Type.DOUBLE); } } }; assertSearchCollapse(producer, true); } public void testCollapseString() throws Exception { CollapsingDocValuesProducer producer = new CollapsingDocValuesProducer<BytesRef>() { @Override public BytesRef randomGroup(int maxGroup) { return new BytesRef(Integer.toString(randomIntBetween(0, maxGroup - 1))); } @Override public void add(Document doc, BytesRef value, boolean multivalued) { if (multivalued) { doc.add(new SortedSetDocValuesField("field", value)); } else { doc.add(new SortedDocValuesField("field", value)); } } @Override public SortField sortField(boolean multivalued) { if (multivalued) { return new SortedSetSortField("field", false); } else { return new SortField("field", SortField.Type.STRING_VAL); } } }; assertSearchCollapse(producer, false); } public void testEmptyNumericSegment() throws Exception { final Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(new NumericDocValuesField("group", 0)); w.addDocument(doc); doc.clear(); doc.add(new NumericDocValuesField("group", 1)); w.addDocument(doc); w.commit(); doc.clear(); doc.add(new NumericDocValuesField("group", 10)); w.addDocument(doc); w.commit(); doc.clear(); doc.add(new NumericDocValuesField("category", 0)); w.addDocument(doc); w.commit(); final IndexReader reader = w.getReader(); final IndexSearcher searcher = newSearcher(reader); SortField sortField = new SortField("group", SortField.Type.LONG); sortField.setMissingValue(Long.MAX_VALUE); Sort sort = new Sort(sortField); final CollapsingTopDocsCollector collapsingCollector = CollapsingTopDocsCollector.createNumeric("group", sort, 10, false); searcher.search(new MatchAllDocsQuery(), collapsingCollector); CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs(); assertEquals(4, collapseTopFieldDocs.scoreDocs.length); assertEquals(4, collapseTopFieldDocs.collapseValues.length); assertEquals(0L, collapseTopFieldDocs.collapseValues[0]); assertEquals(1L, collapseTopFieldDocs.collapseValues[1]); assertEquals(10L, collapseTopFieldDocs.collapseValues[2]); assertNull(collapseTopFieldDocs.collapseValues[3]); w.close(); reader.close(); dir.close(); } public void testEmptySortedSegment() throws Exception { final Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(new SortedDocValuesField("group", new BytesRef("0"))); w.addDocument(doc); doc.clear(); doc.add(new SortedDocValuesField("group", new BytesRef("1"))); w.addDocument(doc); w.commit(); doc.clear(); doc.add(new SortedDocValuesField("group", new BytesRef("10"))); w.addDocument(doc); w.commit(); doc.clear(); doc.add(new NumericDocValuesField("category", 0)); w.addDocument(doc); w.commit(); final IndexReader reader = w.getReader(); final IndexSearcher searcher = newSearcher(reader); Sort sort = new Sort(new SortField("group", SortField.Type.STRING_VAL)); final CollapsingTopDocsCollector collapsingCollector = CollapsingTopDocsCollector.createKeyword("group", sort, 10, false); searcher.search(new MatchAllDocsQuery(), collapsingCollector); CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs(); assertEquals(4, collapseTopFieldDocs.scoreDocs.length); assertEquals(4, collapseTopFieldDocs.collapseValues.length); assertNull(collapseTopFieldDocs.collapseValues[0]); assertEquals(new BytesRef("0"), collapseTopFieldDocs.collapseValues[1]); assertEquals(new BytesRef("1"), collapseTopFieldDocs.collapseValues[2]); assertEquals(new BytesRef("10"), collapseTopFieldDocs.collapseValues[3]); w.close(); reader.close(); dir.close(); } }