FunctionQueryTest.java example

Explorer
l4ia-master
- src
  - lia
package lia.advsearching;

/**
 * Copyright Manning Publications Co.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific lan      
*/

import junit.framework.TestCase;
import java.util.Date;
import java.io.IOException;

import lia.common.TestUtil;

import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.Version;

// From chapter 5
public class FunctionQueryTest extends TestCase {

  IndexSearcher s;
  IndexWriter w;

  private void addDoc(int score, String content) throws Exception {
    Document doc = new Document();
    doc.add(new Field("score",
                      Integer.toString(score),
                      Field.Store.NO,
                      Field.Index.NOT_ANALYZED_NO_NORMS));
    doc.add(new Field("content",
                      content,
                      Field.Store.NO,
                      Field.Index.ANALYZED));
    w.addDocument(doc);
  }

  public void setUp() throws Exception {
    Directory dir = new RAMDirectory();
    w = new IndexWriter(dir,
                        new StandardAnalyzer(
                                 Version.LUCENE_30),
                        IndexWriter.MaxFieldLength.UNLIMITED);
    addDoc(7, "this hat is green");
    addDoc(42, "this hat is blue");
    w.close();

    s = new IndexSearcher(dir, true);
  }

  public void tearDown() throws Exception {
    super.tearDown();
    s.close();
  }

  public void testFieldScoreQuery() throws Throwable {
    Query q = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
    TopDocs hits = s.search(q, 10);
    assertEquals(2, hits.scoreDocs.length);       // #1
    assertEquals(1, hits.scoreDocs[0].doc);       // #2
    assertEquals(42, (int) hits.scoreDocs[0].score);
    assertEquals(0, hits.scoreDocs[1].doc);
    assertEquals(7, (int) hits.scoreDocs[1].score);
  }

  /*
    #1 All documents match
    #2 Doc 1 is first because its static score (42) is
       higher than doc 0's (7)
  */

  public void testCustomScoreQuery() throws Throwable {
    Query q = new QueryParser(Version.LUCENE_30,
                              "content",
                              new StandardAnalyzer(
                                Version.LUCENE_30))
                 .parse("the green hat");
    FieldScoreQuery qf = new FieldScoreQuery("score",
                                             FieldScoreQuery.Type.BYTE);
    CustomScoreQuery customQ = new CustomScoreQuery(q, qf) {
      public CustomScoreProvider getCustomScoreProvider(IndexReader r) {
        return new CustomScoreProvider(r) {
          public float customScore(int doc,
                                   float subQueryScore,
                                   float valSrcScore) {
            return (float) (Math.sqrt(subQueryScore) * valSrcScore);
          }
        };
      }
    };

    TopDocs hits = s.search(customQ, 10);
    assertEquals(2, hits.scoreDocs.length);
    
    assertEquals(1, hits.scoreDocs[0].doc);           // #1
    assertEquals(0, hits.scoreDocs[1].doc);
  }

  /*
    #1 Even though document 0 is a better match to the
       original query, document 1 gets a better score after
       multiplying in its score field
   */

  static class RecencyBoostingQuery extends CustomScoreQuery {

    double multiplier;
    int today;
    int maxDaysAgo;
    String dayField;
    static int MSEC_PER_DAY = 1000*3600*24;

    public RecencyBoostingQuery(Query q, double multiplier,
                                int maxDaysAgo, String dayField) {
      super(q);
      today = (int) (new Date().getTime()/MSEC_PER_DAY);
      this.multiplier = multiplier;
      this.maxDaysAgo = maxDaysAgo;
      this.dayField = dayField;
    }

    private class RecencyBooster extends CustomScoreProvider {
      final int[] publishDay;

      public RecencyBooster(IndexReader r) throws IOException {
        super(r);
        publishDay = FieldCache.DEFAULT                       // #A
             .getInts(r, dayField);                           // #A
      }

      public float customScore(int doc, float subQueryScore,
                               float valSrcScore) {
        int daysAgo = today - publishDay[doc];                // #B
        if (daysAgo < maxDaysAgo) {                           // #C            
          float boost = (float) (multiplier *                 // #D
                                 (maxDaysAgo-daysAgo)         // #D
                                 / maxDaysAgo);               // #D
          return (float) (subQueryScore * (1.0+boost));
        } else {
          return subQueryScore;                               // #E
        }
      }
    }

    public CustomScoreProvider getCustomScoreProvider(IndexReader r) throws IOException {
      return new RecencyBooster(r);
    }
  }

  /*
    #A Retrieve days from field cache
    #B Compute elapsed days
    #C Skip old books
    #D Compute simple linear boost
    #E Return un-boosted score
  */

  public void testRecency() throws Throwable {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader r = IndexReader.open(dir);
    IndexSearcher s = new IndexSearcher(r);
    s.setDefaultFieldSortScoring(true, true);

    QueryParser parser = new QueryParser(
                            Version.LUCENE_30,
                            "contents",
                            new StandardAnalyzer(
                              Version.LUCENE_30));
    Query q = parser.parse("java in action");       // #A
    Query q2 = new RecencyBoostingQuery(q,          // #B
                                        2.0, 2*365,
                                        "pubmonthAsDay");
    Sort sort = new Sort(new SortField[] {
        SortField.FIELD_SCORE,
        new SortField("title2", SortField.STRING)});
    TopDocs hits = s.search(q2, null, 5, sort);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
      Document doc = r.document(hits.scoreDocs[i].doc);
      System.out.println((1+i) + ": " +
                         doc.get("title") +
                         ": pubmonth=" +
                         doc.get("pubmonth") +
                         " score=" + hits.scoreDocs[i].score);
    }
    s.close();
    r.close();
    dir.close();
  }

  /*
    #A Parse query
    #B Create recency boosting query
  */
}