/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search; import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; public class TestQueryRescorer extends LuceneTestCase { private IndexSearcher getSearcher(IndexReader r) { IndexSearcher searcher = newSearcher(r); // We rely on more tokens = lower score: searcher.setSimilarity(new ClassicSimilarity()); return searcher; } public static IndexWriterConfig newIndexWriterConfig() { // We rely on more tokens = lower score: return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity()); } public void testBasic() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(newStringField("id", "0", Field.Store.YES)); doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); // Do ordinary BooleanQuery: BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = getSearcher(r); searcher.setSimilarity(new ClassicSimilarity()); TopDocs hits = searcher.search(bq.build(), 10); assertEquals(2, hits.totalHits); assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(5, "field", "wizard", "oz"); TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: assertEquals(2, hits2.totalHits); assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0, true); TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: assertEquals(2, hits3.totalHits); assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id")); assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id")); r.close(); dir.close(); } // Test LUCENE-5682 public void testNullScorerTermQuery() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(newStringField("id", "0", Field.Store.YES)); doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); // Do ordinary BooleanQuery: BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = getSearcher(r); searcher.setSimilarity(new ClassicSimilarity()); TopDocs hits = searcher.search(bq.build(), 10); assertEquals(2, hits.totalHits); assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); // Now, resort using TermQuery on term that does not exist. TermQuery tq = new TermQuery(new Term("field", "gold")); TopDocs hits2 = QueryRescorer.rescore(searcher, hits, tq, 2.0, 10); // Just testing that null scorer is handled. assertEquals(2, hits2.totalHits); r.close(); dir.close(); } public void testCustomCombine() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(newStringField("id", "0", Field.Store.YES)); doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); // Do ordinary BooleanQuery: BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = getSearcher(r); TopDocs hits = searcher.search(bq.build(), 10); assertEquals(2, hits.totalHits); assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); // Now, resort using PhraseQuery, but with an // opposite-world combine: PhraseQuery pq = new PhraseQuery(5, "field", "wizard", "oz"); TopDocs hits2 = new QueryRescorer(pq) { @Override protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score -= 2.0 * secondPassScore; } return score; } }.rescore(searcher, hits, 10); // Resorting didn't change the order: assertEquals(2, hits2.totalHits); assertEquals("0", searcher.doc(hits2.scoreDocs[0].doc).get("id")); assertEquals("1", searcher.doc(hits2.scoreDocs[1].doc).get("id")); r.close(); dir.close(); } public void testExplain() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(newStringField("id", "0", Field.Store.YES)); doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); // Do ordinary BooleanQuery: BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = getSearcher(r); TopDocs hits = searcher.search(bq.build(), 10); assertEquals(2, hits.totalHits); assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery("field", "wizard", "oz"); Rescorer rescorer = new QueryRescorer(pq) { @Override protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score += 2.0 * secondPassScore; } return score; } }; TopDocs hits2 = rescorer.rescore(searcher, hits, 10); // Resorting changed the order: assertEquals(2, hits2.totalHits); assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); int docID = hits2.scoreDocs[0].doc; Explanation explain = rescorer.explain(searcher, searcher.explain(bq.build(), docID), docID); String s = explain.toString(); assertTrue(s.contains("TestQueryRescorer$")); assertTrue(s.contains("combined first and second pass score")); assertTrue(s.contains("first pass score")); assertTrue(s.contains("= second pass score")); assertEquals(hits2.scoreDocs[0].score, explain.getValue(), 0.0f); docID = hits2.scoreDocs[1].doc; explain = rescorer.explain(searcher, searcher.explain(bq.build(), docID), docID); s = explain.toString(); assertTrue(s.contains("TestQueryRescorer$")); assertTrue(s.contains("combined first and second pass score")); assertTrue(s.contains("first pass score")); assertTrue(s.contains("no second pass score")); assertFalse(s.contains("= second pass score")); assertEquals(hits2.scoreDocs[1].score, explain.getValue(), 0.0f); r.close(); dir.close(); } public void testMissingSecondPassScore() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); Document doc = new Document(); doc.add(newStringField("id", "0", Field.Store.YES)); doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); // Do ordinary BooleanQuery: BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = getSearcher(r); TopDocs hits = searcher.search(bq.build(), 10); assertEquals(2, hits.totalHits); assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); // Now, resort using PhraseQuery, no slop: PhraseQuery pq = new PhraseQuery("field", "wizard", "oz"); TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: assertEquals(2, hits2.totalHits); assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0, true); TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: assertEquals(2, hits3.totalHits); assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id")); assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id")); r.close(); dir.close(); } public void testRandom() throws Exception { Directory dir = newDirectory(); int numDocs = atLeast(1000); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); final int[] idToNum = new int[numDocs]; int maxValue = TestUtil.nextInt(random(), 10, 1000000); for(int i=0;i<numDocs;i++) { Document doc = new Document(); doc.add(newStringField("id", ""+i, Field.Store.YES)); int numTokens = TestUtil.nextInt(random(), 1, 10); StringBuilder b = new StringBuilder(); for(int j=0;j<numTokens;j++) { b.append("a "); } doc.add(newTextField("field", b.toString(), Field.Store.NO)); idToNum[i] = random().nextInt(maxValue); doc.add(new NumericDocValuesField("num", idToNum[i])); w.addDocument(doc); } final IndexReader r = w.getReader(); w.close(); IndexSearcher s = newSearcher(r); int numHits = TestUtil.nextInt(random(), 1, numDocs); boolean reverse = random().nextBoolean(); //System.out.println("numHits=" + numHits + " reverse=" + reverse); TopDocs hits = s.search(new TermQuery(new Term("field", "a")), numHits); TopDocs hits2 = new QueryRescorer(new FixedScoreQuery(idToNum, reverse)) { @Override protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) { return secondPassScore; } }.rescore(s, hits, numHits); Integer[] expected = new Integer[numHits]; for(int i=0;i<numHits;i++) { expected[i] = hits.scoreDocs[i].doc; } final int reverseInt = reverse ? -1 : 1; Arrays.sort(expected, new Comparator<Integer>() { @Override public int compare(Integer a, Integer b) { try { int av = idToNum[Integer.parseInt(r.document(a).get("id"))]; int bv = idToNum[Integer.parseInt(r.document(b).get("id"))]; if (av < bv) { return -reverseInt; } else if (bv < av) { return reverseInt; } else { // Tie break by docID, ascending return a - b; } } catch (IOException ioe) { throw new RuntimeException(ioe); } } }); boolean fail = false; for(int i=0;i<numHits;i++) { //System.out.println("expected=" + expected[i] + " vs " + hits2.scoreDocs[i].doc + " v=" + idToNum[Integer.parseInt(r.document(expected[i]).get("id"))]); if (expected[i].intValue() != hits2.scoreDocs[i].doc) { //System.out.println(" diff!"); fail = true; } } assertFalse(fail); r.close(); dir.close(); } /** Just assigns score == idToNum[doc("id")] for each doc. */ private static class FixedScoreQuery extends Query { private final int[] idToNum; private final boolean reverse; public FixedScoreQuery(int[] idToNum, boolean reverse) { this.idToNum = idToNum; this.reverse = reverse; } @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { return new Weight(FixedScoreQuery.this) { @Override public void extractTerms(Set<Term> terms) { } @Override public Scorer scorer(final LeafReaderContext context) throws IOException { return new Scorer(null) { int docID = -1; @Override public int docID() { return docID; } @Override public int freq() { return 1; } @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { @Override public int docID() { return docID; } @Override public long cost() { return 1; } @Override public int nextDoc() { docID++; if (docID >= context.reader().maxDoc()) { return NO_MORE_DOCS; } return docID; } @Override public int advance(int target) { docID = target; return docID; } }; } @Override public float score() throws IOException { int num = idToNum[Integer.parseInt(context.reader().document(docID).get("id"))]; if (reverse) { //System.out.println("score doc=" + docID + " num=" + num); return num; } else { //System.out.println("score doc=" + docID + " num=" + -num); return -num; } } }; } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { return null; } }; } @Override public String toString(String field) { return "FixedScoreQuery " + idToNum.length + " ids; reverse=" + reverse; } @Override public boolean equals(Object other) { return sameClassAs(other) && equalsTo(getClass().cast(other)); } private boolean equalsTo(FixedScoreQuery other) { return reverse == other.reverse && Arrays.equals(idToNum, other.idToNum); } @Override public int hashCode() { int hash = classHash(); hash = 31 * hash + (reverse ? 0 : 1); hash = 31 * hash + Arrays.hashCode(idToNum); return hash; } @Override public Query clone() { return new FixedScoreQuery(idToNum, reverse); } } }