/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.document;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.After;
import org.junit.Before;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
import static org.apache.lucene.search.suggest.document.TestSuggestField.assertSuggestions;
import static org.apache.lucene.search.suggest.document.TestSuggestField.iwcWithSuggestField;
import static org.hamcrest.core.IsEqual.equalTo;
public class TestPrefixCompletionQuery extends LuceneTestCase {
private static class NumericRangeBitsProducer extends BitsProducer {
private final String field;
private final long min, max;
public NumericRangeBitsProducer(String field, long min, long max) {
this.field = field;
this.min = min;
this.max = max;
}
@Override
public String toString() {
return field + "[" + min + ".." + max + "]";
}
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
NumericRangeBitsProducer that = (NumericRangeBitsProducer) obj;
return field.equals(that.field)
&& min == that.min
&& max == that.max;
}
@Override
public int hashCode() {
return Objects.hash(getClass(), field, min, max);
}
@Override
public Bits getBits(final LeafReaderContext context) throws IOException {
final int maxDoc = context.reader().maxDoc();
FixedBitSet bits = new FixedBitSet(maxDoc);
final SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
int docID;
while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
final int count = values.docValueCount();
for (int i = 0; i < count; ++i) {
final long v = values.nextValue();
if (v >= min && v <= max) {
bits.set(docID);
break;
}
}
}
return bits;
}
}
public Directory dir;
@Before
public void before() throws Exception {
dir = newDirectory();
}
@After
public void after() throws Exception {
dir.close();
}
public void testSimple() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc", 3));
document.add(new SuggestField("suggest_field", "abd", 4));
document.add(new SuggestField("suggest_field", "The Foo Fighters", 2));
iw.addDocument(document);
document = new Document();
document.add(new SuggestField("suggest_field", "abcdd", 5));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3));
reader.close();
iw.close();
}
public void testMostlyFilteredOutDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(new NumericDocValuesField("filter_int_fld", i));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
int topScore = num/2;
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 0, topScore);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if at most half of the top scoring documents have been filtered out
// the search should be admissible for a single segment
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits >= 1);
assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));
filter = new NumericRangeBitsProducer("filter_int_fld", 0, 0);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if more than half of the top scoring documents have been filtered out
// search is not admissible, so # of suggestions requested is num instead of 1
suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, new Entry("abc_0", 0));
filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if only lower scoring documents are filtered out
// search is admissible
suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1));
reader.close();
iw.close();
}
public void testDocFiltering() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new NumericDocValuesField("filter_int_fld", 9));
document.add(new SuggestField("suggest_field", "apples", 3));
iw.addDocument(document);
document = new Document();
document.add(new NumericDocValuesField("filter_int_fld", 10));
document.add(new SuggestField("suggest_field", "applle", 4));
iw.addDocument(document);
document = new Document();
document.add(new NumericDocValuesField("filter_int_fld", 4));
document.add(new SuggestField("suggest_field", "apple", 5));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
// suggest without filter
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3));
// suggest with filter
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter);
suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3));
reader.close();
iw.close();
}
public void testAnalyzerWithoutPreservePosAndSep() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, false);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(completionAnalyzer, "suggest_field_no_p_sep_or_pos_inc"));
Document document = new Document();
document.add(new SuggestField("suggest_field_no_p_sep_or_pos_inc", "foobar", 7));
document.add(new SuggestField("suggest_field_no_p_sep_or_pos_inc", "foo bar", 8));
document.add(new SuggestField("suggest_field_no_p_sep_or_pos_inc", "the fo", 9));
document.add(new SuggestField("suggest_field_no_p_sep_or_pos_inc", "the foo bar", 10));
iw.addDocument(document);
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "foob"));
suggest = indexSearcher.suggest(query, 4, false); // not the fo
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
reader.close();
iw.close();
}
public void testAnalyzerWithSepAndNoPreservePos() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(completionAnalyzer, "suggest_field_no_p_pos_inc"));
Document document = new Document();
document.add(new SuggestField("suggest_field_no_p_pos_inc", "foobar", 7));
document.add(new SuggestField("suggest_field_no_p_pos_inc", "foo bar", 8));
document.add(new SuggestField("suggest_field_no_p_pos_inc", "the fo", 9));
document.add(new SuggestField("suggest_field_no_p_pos_inc", "the foo bar", 10));
iw.addDocument(document);
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); //matches all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "foob"));
suggest = indexSearcher.suggest(query, 4, false); // only foobar
assertSuggestions(suggest, new Entry("foobar", 7));
reader.close();
iw.close();
}
public void testAnalyzerWithPreservePosAndNoSep() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, true);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(completionAnalyzer, "suggest_field_no_p_sep"));
Document document = new Document();
document.add(new SuggestField("suggest_field_no_p_sep", "foobar", 7));
document.add(new SuggestField("suggest_field_no_p_sep", "foo bar", 8));
document.add(new SuggestField("suggest_field_no_p_sep", "the fo", 9));
document.add(new SuggestField("suggest_field_no_p_sep", "the foo bar", 10));
iw.addDocument(document);
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // matches all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "foob"));
suggest = indexSearcher.suggest(query, 4, false); // except the fo
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
reader.close();
iw.close();
}
public void testGhostField() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field", "suggest_field2", "suggest_field3"));
Document document = new Document();
document.add(new StringField("id", "0", Field.Store.NO));
document.add(new SuggestField("suggest_field", "apples", 3));
iw.addDocument(document);
// need another document so whole segment isn't deleted
iw.addDocument(new Document());
iw.commit();
document = new Document();
document.add(new StringField("id", "1", Field.Store.NO));
document.add(new SuggestField("suggest_field2", "apples", 3));
iw.addDocument(document);
iw.commit();
iw.deleteDocuments(new Term("id", "0"));
// first force merge is OK
iw.forceMerge(1);
// second force merge causes MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
// this field anymore)
iw.addDocument(new Document());
iw.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
assertEquals(0, indexSearcher.suggest(query, 3, false).totalHits);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
assertSuggestions(indexSearcher.suggest(query, 3, false), new Entry("apples", 3));
reader.close();
iw.close();
}
}