/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.suggest.document; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CyclicBarrier; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene70.Lucene70Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.suggest.BitsProducer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.junit.After; import org.junit.Before; import org.junit.Test; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc; import static org.hamcrest.core.IsEqual.equalTo; public class TestSuggestField extends LuceneTestCase { public Directory dir; @Before public void before() throws Exception { dir = newDirectory(); } @After public void after() throws Exception { dir.close(); } @Test public void testEmptySuggestion() throws Exception { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { new SuggestField("suggest_field", "", 3); }); assertTrue(expected.getMessage().contains("value")); } @Test public void testNegativeWeight() throws Exception { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { new SuggestField("suggest_field", "sugg", -1); }); assertTrue(expected.getMessage().contains("weight")); } @Test public void testReservedChars() throws Exception { CharsRefBuilder charsRefBuilder = new CharsRefBuilder(); charsRefBuilder.append("sugg"); charsRefBuilder.setCharAt(2, (char) CompletionAnalyzer.SEP_LABEL); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { new SuggestField("name", charsRefBuilder.toString(), 1); }); assertTrue(expected.getMessage().contains("[0x1f]")); charsRefBuilder.setCharAt(2, (char) CompletionAnalyzer.HOLE_CHARACTER); expected = expectThrows(IllegalArgumentException.class, () -> { new SuggestField("name", charsRefBuilder.toString(), 1); }); assertTrue(expected.getMessage().contains("[0x1e]")); charsRefBuilder.setCharAt(2, (char) NRTSuggesterBuilder.END_BYTE); expected = expectThrows(IllegalArgumentException.class, () -> { new SuggestField("name", charsRefBuilder.toString(), 1); }); assertTrue(expected.getMessage().contains("[0x0]")); } @Test public void testEmpty() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); DirectoryReader reader = iw.getReader(); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")); TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false); assertThat(lookupDocs.totalHits, equalTo(0)); reader.close(); iw.close(); } @Test public void testTokenStream() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); SuggestField suggestField = new SuggestField("field", "input", 1); BytesRef surfaceForm = new BytesRef("input"); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) { output.writeVInt(surfaceForm.length); output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); output.writeVInt(1 + 1); output.writeByte(SuggestField.TYPE); } BytesRef payload = new BytesRef(byteArrayOutputStream.toByteArray()); TokenStream stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(suggestField.tokenStream(analyzer, null)); assertTokenStreamContents(stream, new String[] {"input"}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null); CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer); stream = new CompletionTokenStreamTest.PayloadAttrToTypeAttrFilter(suggestField.tokenStream(completionAnalyzer, null)); assertTokenStreamContents(stream, new String[] {"input"}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null); } @Test public void testDupSuggestFieldValues() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); final int num = Math.min(1000, atLeast(300)); int[] weights = new int[num]; for(int i = 0; i < num; i++) { Document document = new Document(); weights[i] = random().nextInt(Integer.MAX_VALUE); document.add(new SuggestField("suggest_field", "abc", weights[i])); iw.addDocument(document); if (usually()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); Entry[] expectedEntries = new Entry[num]; Arrays.sort(weights); for (int i = 1; i <= num; i++) { expectedEntries[i - 1] = new Entry("abc", weights[num - i]); } SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc")); TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false); assertSuggestions(lookupDocs, expectedEntries); reader.close(); iw.close(); } public void testDeduplication() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); final int num = TestUtil.nextInt(random(), 2, 20); int[] weights = new int[num]; int bestABCWeight = Integer.MIN_VALUE; int bestABDWeight = Integer.MIN_VALUE; for(int i = 0; i < num; i++) { Document document = new Document(); weights[i] = random().nextInt(Integer.MAX_VALUE); String suggestValue; boolean doABC; if (i == 0) { doABC = true; } else if (i == 1) { doABC = false; } else { doABC = random().nextBoolean(); } if (doABC) { suggestValue = "abc"; bestABCWeight = Math.max(bestABCWeight, weights[i]); } else { suggestValue = "abd"; bestABDWeight = Math.max(bestABDWeight, weights[i]); } document.add(new SuggestField("suggest_field", suggestValue, weights[i])); iw.addDocument(document); if (usually()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); Entry[] expectedEntries = new Entry[2]; if (bestABDWeight > bestABCWeight) { expectedEntries[0] = new Entry("abd", bestABDWeight); expectedEntries[1] = new Entry("abc", bestABCWeight); } else { expectedEntries[0] = new Entry("abc", bestABCWeight); expectedEntries[1] = new Entry("abd", bestABDWeight); } SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a")); TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true); suggestIndexSearcher.suggest(query, collector); TopSuggestDocs lookupDocs = collector.get(); assertSuggestions(lookupDocs, expectedEntries); reader.close(); iw.close(); } public void testExtremeDeduplication() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); final int num = atLeast(5000); int bestWeight = Integer.MIN_VALUE; for(int i = 0; i < num; i++) { Document document = new Document(); int weight = TestUtil.nextInt(random(), 10, 100); bestWeight = Math.max(weight, bestWeight); document.add(new SuggestField("suggest_field", "abc", weight)); iw.addDocument(document); if (rarely()) { iw.commit(); } } Document document = new Document(); document.add(new SuggestField("suggest_field", "abd", 7)); iw.addDocument(document); if (random().nextBoolean()) { iw.forceMerge(1); } DirectoryReader reader = iw.getReader(); Entry[] expectedEntries = new Entry[2]; expectedEntries[0] = new Entry("abc", bestWeight); expectedEntries[1] = new Entry("abd", 7); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a")); TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true); suggestIndexSearcher.suggest(query, collector); TopSuggestDocs lookupDocs = collector.get(); assertSuggestions(lookupDocs, expectedEntries); reader.close(); iw.close(); } private static String randomSimpleString(int numDigits, int maxLen) { final int len = TestUtil.nextInt(random(), 1, maxLen); final char[] chars = new char[len]; for(int j=0;j<len;j++) { chars[j] = (char) ('a' + random().nextInt(numDigits)); } return new String(chars); } public void testRandom() throws Exception { int numDigits = TestUtil.nextInt(random(), 1, 6); Set<String> keys = new HashSet<>(); int keyCount = TestUtil.nextInt(random(), 1, 20); if (numDigits == 1) { keyCount = Math.min(9, keyCount); } while (keys.size() < keyCount) { keys.add(randomSimpleString(numDigits, 10)); } List<String> keysList = new ArrayList<>(keys); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field"); // we rely on docID order: iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int docCount = TestUtil.nextInt(random(), 1, 200); Entry[] docs = new Entry[docCount]; for(int i=0;i<docCount;i++) { int weight = random().nextInt(40); String key = keysList.get(random().nextInt(keyCount)); //System.out.println("KEY: " + key); docs[i] = new Entry(key, null, weight, i); Document doc = new Document(); doc.add(new SuggestField("suggest_field", key, weight)); iw.addDocument(doc); if (usually()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader); int iters = atLeast(200); for(int iter=0;iter<iters;iter++) { String prefix = randomSimpleString(numDigits, 2); if (VERBOSE) { System.out.println("\nTEST: prefix=" + prefix); } // slow but hopefully correct suggester: List<Entry> expected = new ArrayList<>(); for(Entry doc : docs) { if (doc.output.startsWith(prefix)) { expected.add(doc); } } Collections.sort(expected, new Comparator<Entry>() { @Override public int compare(Entry a, Entry b) { // sort by higher score: int cmp = Float.compare(b.value, a.value); if (cmp == 0) { // tie break by smaller docID: cmp = Integer.compare(a.id, b.id); } return cmp; } }); boolean dedup = random().nextBoolean(); if (dedup) { List<Entry> deduped = new ArrayList<>(); Set<String> seen = new HashSet<>(); for(Entry entry : expected) { if (seen.contains(entry.output) == false) { seen.add(entry.output); deduped.add(entry); } } expected = deduped; } // TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now: //int topN = TestUtil.nextInt(random(), 1, docCount+10); int topN = docCount; if (VERBOSE) { if (dedup) { System.out.println(" expected (dedup'd) topN=" + topN + ":"); } else { System.out.println(" expected topN=" + topN + ":"); } for(int i=0;i<expected.size();i++) { if (i >= topN) { System.out.println(" leftover: " + i + ": " + expected.get(i)); } else { System.out.println(" " + i + ": " + expected.get(i)); } } } expected = expected.subList(0, Math.min(topN, expected.size())); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix)); TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup); searcher.suggest(query, collector); TopSuggestDocs actual = collector.get(); if (VERBOSE) { System.out.println(" actual:"); SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs; for(int i=0;i<suggestScoreDocs.length;i++) { System.out.println(" " + i + ": " + suggestScoreDocs[i]); } } assertSuggestions(actual, expected.toArray(new Entry[expected.size()])); } reader.close(); iw.close(); } @Test public void testNRTDeletedDocFiltering() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); // using IndexWriter instead of RandomIndexWriter IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); int numLive = 0; List<Entry> expectedEntries = new ArrayList<>(); for (int i = 0; i < num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, num - i)); if (i % 2 == 0) { document.add(newStringField("str_field", "delete", Field.Store.YES)); } else { numLive++; expectedEntries.add(new Entry("abc_" + i, num - i)); document.add(newStringField("str_field", "no_delete", Field.Store.YES)); } iw.addDocument(document); if (usually()) { iw.commit(); } } iw.deleteDocuments(new Term("str_field", "delete")); DirectoryReader reader = DirectoryReader.open(iw); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false); assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()])); reader.close(); iw.close(); } @Test public void testSuggestOnAllFilteredDocuments() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); for (int i = 0; i < num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, i)); document.add(newStringField("str_fld", "deleted", Field.Store.NO)); iw.addDocument(document); if (usually()) { iw.commit(); } } BitsProducer filter = new BitsProducer() { @Override public Bits getBits(LeafReaderContext context) throws IOException { return new Bits.MatchNoBits(context.reader().maxDoc()); } }; DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); // no random access required; // calling suggest with filter that does not match any documents should early terminate PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false); assertThat(suggest.totalHits, equalTo(0)); reader.close(); iw.close(); } @Test public void testSuggestOnAllDeletedDocuments() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); // using IndexWriter instead of RandomIndexWriter IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); for (int i = 0; i < num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, i)); document.add(newStringField("delete", "delete", Field.Store.NO)); iw.addDocument(document); if (usually()) { iw.commit(); } } iw.deleteDocuments(new Term("delete", "delete")); DirectoryReader reader = DirectoryReader.open(iw); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false); assertThat(suggest.totalHits, equalTo(0)); reader.close(); iw.close(); } @Test public void testSuggestOnMostlyDeletedDocuments() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); // using IndexWriter instead of RandomIndexWriter IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); for (int i = 1; i <= num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, i)); document.add(new StoredField("weight_fld", i)); document.add(new IntPoint("weight_fld", i)); iw.addDocument(document); if (usually()) { iw.commit(); } } iw.deleteDocuments(IntPoint.newRangeQuery("weight_fld", 2, Integer.MAX_VALUE)); DirectoryReader reader = DirectoryReader.open(iw); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false); assertSuggestions(suggest, new Entry("abc_1", 1)); reader.close(); iw.close(); } @Test public void testMultipleSuggestFieldsPerDoc() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "sug_field_1", "sug_field_2")); Document document = new Document(); document.add(new SuggestField("sug_field_1", "apple", 4)); document.add(new SuggestField("sug_field_2", "april", 3)); iw.addDocument(document); document = new Document(); document.add(new SuggestField("sug_field_1", "aples", 3)); document.add(new SuggestField("sug_field_2", "apartment", 2)); iw.addDocument(document); if (rarely()) { iw.commit(); } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap")); TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false); assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3)); query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap")); TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false); assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2)); // check that the doc ids are consistent for (int i = 0; i < suggestDocs1.scoreDocs.length; i++) { ScoreDoc suggestScoreDoc = suggestDocs1.scoreDocs[i]; assertThat(suggestScoreDoc.doc, equalTo(suggestDocs2.scoreDocs[i].doc)); } reader.close(); iw.close(); } @Test public void testEarlyTermination() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); // have segments of 4 documents // with descending suggestion weights // suggest should early terminate for // segments with docs having lower suggestion weights for (int i = num; i > 0; i--) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, i)); iw.addDocument(document); if (i % 4 == 0) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false); assertSuggestions(suggest, new Entry("abc_" + num, num)); reader.close(); iw.close(); } @Test public void testMultipleSegments() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); List<Entry> entries = new ArrayList<>(); // ensure at least some segments have no suggest field for (int i = num; i > 0; i--) { Document document = new Document(); if (random().nextInt(4) == 1) { document.add(new SuggestField("suggest_field", "abc_" + i, i)); entries.add(new Entry("abc_" + i, i)); } document.add(new StoredField("weight_fld", i)); iw.addDocument(document); if (usually()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false); assertSuggestions(suggest, entries.toArray(new Entry[entries.size()])); reader.close(); iw.close(); } @Test public void testReturnedDocID() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); for (int i = 0; i < num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, num)); document.add(new StoredField("int_field", i)); iw.addDocument(document); if (random().nextBoolean()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false); assertEquals(num, suggest.totalHits); for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) { String key = suggestScoreDoc.key.toString(); assertTrue(key.startsWith("abc_")); String substring = key.substring(4); int fieldValue = Integer.parseInt(substring); Document doc = reader.document(suggestScoreDoc.doc); assertEquals(doc.getField("int_field").numericValue().intValue(), fieldValue); } reader.close(); iw.close(); } @Test public void testScoring() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(100)); String[] prefixes = {"abc", "bac", "cab"}; Map<String, Integer> mappings = new HashMap<>(); for (int i = 0; i < num; i++) { Document document = new Document(); String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i); int weight = random().nextInt(Integer.MAX_VALUE); document.add(new SuggestField("suggest_field", suggest, weight)); mappings.put(suggest, weight); iw.addDocument(document); if (usually()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); for (String prefix : prefixes) { PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix)); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false); assertTrue(suggest.totalHits > 0); float topScore = -1; for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) { if (topScore != -1) { assertTrue(topScore >= scoreDoc.score); } topScore = scoreDoc.score; assertThat((float) mappings.get(scoreDoc.key.toString()), equalTo(scoreDoc.score)); assertNotNull(mappings.remove(scoreDoc.key.toString())); } } assertThat(mappings.size(), equalTo(0)); reader.close(); iw.close(); } @Test public void testRealisticKeys() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); LineFileDocs lineFileDocs = new LineFileDocs(random()); int num = Math.min(1000, atLeast(100)); Map<String, Integer> mappings = new HashMap<>(); for (int i = 0; i < num; i++) { Document document = lineFileDocs.nextDoc(); String title = document.getField("title").stringValue(); int weight = random().nextInt(Integer.MAX_VALUE); Integer prevWeight = mappings.get(title); if (prevWeight == null || prevWeight < weight) { mappings.put(title, weight); } Document doc = new Document(); doc.add(new SuggestField("suggest_field", title, weight)); iw.addDocument(doc); if (rarely()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); for (Map.Entry<String, Integer> entry : mappings.entrySet()) { String title = entry.getKey(); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title)); TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size(), false); assertTrue(suggest.totalHits > 0); boolean matched = false; for (ScoreDoc scoreDoc : suggest.scoreDocs) { matched = Float.compare(scoreDoc.score, (float) entry.getValue()) == 0; if (matched) { break; } } assertTrue("at least one of the entries should have the score", matched); } reader.close(); iw.close(); } @Test public void testThreads() throws Exception { final Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field_1", "suggest_field_2", "suggest_field_3")); int num = Math.min(1000, atLeast(100)); final String prefix1 = "abc1_"; final String prefix2 = "abc2_"; final String prefix3 = "abc3_"; final Entry[] entries1 = new Entry[num]; final Entry[] entries2 = new Entry[num]; final Entry[] entries3 = new Entry[num]; for (int i = 0; i < num; i++) { int weight = num - (i + 1); entries1[i] = new Entry(prefix1 + weight, weight); entries2[i] = new Entry(prefix2 + weight, weight); entries3[i] = new Entry(prefix3 + weight, weight); } for (int i = 0; i < num; i++) { Document doc = new Document(); doc.add(new SuggestField("suggest_field_1", prefix1 + i, i)); doc.add(new SuggestField("suggest_field_2", prefix2 + i, i)); doc.add(new SuggestField("suggest_field_3", prefix3 + i, i)); iw.addDocument(doc); if (rarely()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); int numThreads = TestUtil.nextInt(random(), 2, 7); Thread threads[] = new Thread[numThreads]; final CyclicBarrier startingGun = new CyclicBarrier(numThreads+1); final CopyOnWriteArrayList<Throwable> errors = new CopyOnWriteArrayList<>(); final SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1)); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false); assertSuggestions(suggest, entries1); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2)); suggest = indexSearcher.suggest(query, num, false); assertSuggestions(suggest, entries2); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3)); suggest = indexSearcher.suggest(query, num, false); assertSuggestions(suggest, entries3); } catch (Throwable e) { errors.add(e); } } }; threads[i].start(); } startingGun.await(); for (Thread t : threads) { t.join(); } assertTrue(errors.toString(), errors.isEmpty()); reader.close(); iw.close(); } static class Entry { final String output; final float value; final String context; final int id; Entry(String output, float value) { this(output, null, value); } Entry(String output, String context, float value) { this(output, context, value, -1); } Entry(String output, String context, float value, int id) { this.output = output; this.value = value; this.context = context; this.id = id; } @Override public String toString() { return "key=" + output + " score=" + value + " context=" + context + " id=" + id; } } static void assertSuggestions(TopDocs actual, Entry... expected) { SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs; for (int i = 0; i < Math.min(expected.length, suggestScoreDocs.length); i++) { SuggestScoreDoc lookupDoc = suggestScoreDocs[i]; String msg = "Hit " + i + ": expected: " + toString(expected[i]) + " but actual: " + toString(lookupDoc); assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output)); assertThat(msg, lookupDoc.score, equalTo(expected[i].value)); assertThat(msg, lookupDoc.context, equalTo(expected[i].context)); } assertThat(suggestScoreDocs.length, equalTo(expected.length)); } private static String toString(Entry expected) { return "key:"+ expected.output+" score:"+expected.value+" context:"+expected.context; } private static String toString(SuggestScoreDoc actual) { return "key:"+ actual.key.toString()+" score:"+actual.score+" context:"+actual.context; } static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, String... suggestFields) { return iwcWithSuggestField(analyzer, asSet(suggestFields)); } static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) { IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); Codec filterCodec = new Lucene70Codec() { PostingsFormat postingsFormat = new Completion50PostingsFormat(); @Override public PostingsFormat getPostingsFormatForField(String field) { if (suggestFields.contains(field)) { return postingsFormat; } return super.getPostingsFormatForField(field); } }; iwc.setCodec(filterCodec); return iwc; } }