package lia.advsearching;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import junit.framework.TestCase;
import java.util.Date;
import java.io.IOException;
import lia.common.TestUtil;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.Version;
// From chapter 5
public class FunctionQueryTest extends TestCase {
IndexSearcher s;
IndexWriter w;
private void addDoc(int score, String content) throws Exception {
Document doc = new Document();
doc.add(new Field("score",
Integer.toString(score),
Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("content",
content,
Field.Store.NO,
Field.Index.ANALYZED));
w.addDocument(doc);
}
public void setUp() throws Exception {
Directory dir = new RAMDirectory();
w = new IndexWriter(dir,
new StandardAnalyzer(
Version.LUCENE_30),
IndexWriter.MaxFieldLength.UNLIMITED);
addDoc(7, "this hat is green");
addDoc(42, "this hat is blue");
w.close();
s = new IndexSearcher(dir, true);
}
public void tearDown() throws Exception {
super.tearDown();
s.close();
}
public void testFieldScoreQuery() throws Throwable {
Query q = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
TopDocs hits = s.search(q, 10);
assertEquals(2, hits.scoreDocs.length); // #1
assertEquals(1, hits.scoreDocs[0].doc); // #2
assertEquals(42, (int) hits.scoreDocs[0].score);
assertEquals(0, hits.scoreDocs[1].doc);
assertEquals(7, (int) hits.scoreDocs[1].score);
}
/*
#1 All documents match
#2 Doc 1 is first because its static score (42) is
higher than doc 0's (7)
*/
public void testCustomScoreQuery() throws Throwable {
Query q = new QueryParser(Version.LUCENE_30,
"content",
new StandardAnalyzer(
Version.LUCENE_30))
.parse("the green hat");
FieldScoreQuery qf = new FieldScoreQuery("score",
FieldScoreQuery.Type.BYTE);
CustomScoreQuery customQ = new CustomScoreQuery(q, qf) {
public CustomScoreProvider getCustomScoreProvider(IndexReader r) {
return new CustomScoreProvider(r) {
public float customScore(int doc,
float subQueryScore,
float valSrcScore) {
return (float) (Math.sqrt(subQueryScore) * valSrcScore);
}
};
}
};
TopDocs hits = s.search(customQ, 10);
assertEquals(2, hits.scoreDocs.length);
assertEquals(1, hits.scoreDocs[0].doc); // #1
assertEquals(0, hits.scoreDocs[1].doc);
}
/*
#1 Even though document 0 is a better match to the
original query, document 1 gets a better score after
multiplying in its score field
*/
static class RecencyBoostingQuery extends CustomScoreQuery {
double multiplier;
int today;
int maxDaysAgo;
String dayField;
static int MSEC_PER_DAY = 1000*3600*24;
public RecencyBoostingQuery(Query q, double multiplier,
int maxDaysAgo, String dayField) {
super(q);
today = (int) (new Date().getTime()/MSEC_PER_DAY);
this.multiplier = multiplier;
this.maxDaysAgo = maxDaysAgo;
this.dayField = dayField;
}
private class RecencyBooster extends CustomScoreProvider {
final int[] publishDay;
public RecencyBooster(IndexReader r) throws IOException {
super(r);
publishDay = FieldCache.DEFAULT // #A
.getInts(r, dayField); // #A
}
public float customScore(int doc, float subQueryScore,
float valSrcScore) {
int daysAgo = today - publishDay[doc]; // #B
if (daysAgo < maxDaysAgo) { // #C
float boost = (float) (multiplier * // #D
(maxDaysAgo-daysAgo) // #D
/ maxDaysAgo); // #D
return (float) (subQueryScore * (1.0+boost));
} else {
return subQueryScore; // #E
}
}
}
public CustomScoreProvider getCustomScoreProvider(IndexReader r) throws IOException {
return new RecencyBooster(r);
}
}
/*
#A Retrieve days from field cache
#B Compute elapsed days
#C Skip old books
#D Compute simple linear boost
#E Return un-boosted score
*/
public void testRecency() throws Throwable {
Directory dir = TestUtil.getBookIndexDirectory();
IndexReader r = IndexReader.open(dir);
IndexSearcher s = new IndexSearcher(r);
s.setDefaultFieldSortScoring(true, true);
QueryParser parser = new QueryParser(
Version.LUCENE_30,
"contents",
new StandardAnalyzer(
Version.LUCENE_30));
Query q = parser.parse("java in action"); // #A
Query q2 = new RecencyBoostingQuery(q, // #B
2.0, 2*365,
"pubmonthAsDay");
Sort sort = new Sort(new SortField[] {
SortField.FIELD_SCORE,
new SortField("title2", SortField.STRING)});
TopDocs hits = s.search(q2, null, 5, sort);
for (int i = 0; i < hits.scoreDocs.length; i++) {
Document doc = r.document(hits.scoreDocs[i].doc);
System.out.println((1+i) + ": " +
doc.get("title") +
": pubmonth=" +
doc.get("pubmonth") +
" score=" + hits.scoreDocs[i].score);
}
s.close();
r.close();
dir.close();
}
/*
#A Parse query
#B Create recency boosting query
*/
}