TestSpansAdvanced.java example

Explorer
solrcene-master
package org.apache.lucene.search.spans;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Random;

import org.apache.lucene.util.LuceneTestCase;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;

/*******************************************************************************
 * Tests the span query bug in Lucene. It demonstrates that SpanTermQuerys don't
 * work correctly in a BooleanQuery.
 * 
 */
public class TestSpansAdvanced extends LuceneTestCase {
  
  // location to the index
  protected Directory mDirectory;
  protected IndexReader reader;
  protected IndexSearcher searcher;
  protected Random random;
  
  // field names in the index
  private final static String FIELD_ID = "ID";
  protected final static String FIELD_TEXT = "TEXT";
  
  /**
   * Initializes the tests by adding 4 identical documents to the index.
   */
  @Override
  protected void setUp() throws Exception {
    super.setUp();
    random = newRandom();
    // create test index
    mDirectory = newDirectory(random);
    final RandomIndexWriter writer = new RandomIndexWriter(random,
        mDirectory, new MockAnalyzer(MockTokenizer.SIMPLE, true,
                MockTokenFilter.ENGLISH_STOPSET, true));
    addDocument(writer, "1", "I think it should work.");
    addDocument(writer, "2", "I think it should work.");
    addDocument(writer, "3", "I think it should work.");
    addDocument(writer, "4", "I think it should work.");
    reader = writer.getReader();
    writer.close();
    searcher = new IndexSearcher(reader);
  }
  
  @Override
  protected void tearDown() throws Exception {
    searcher.close();
    reader.close();
    mDirectory.close();
    mDirectory = null;
    super.tearDown();
  }
  
  /**
   * Adds the document to the index.
   * 
   * @param writer the Lucene index writer
   * @param id the unique id of the document
   * @param text the text of the document
   * @throws IOException
   */
  protected void addDocument(final RandomIndexWriter writer, final String id,
      final String text) throws IOException {
    
    final Document document = new Document();
    document.add(new Field(FIELD_ID, id, Field.Store.YES,
        Field.Index.NOT_ANALYZED));
    document.add(new Field(FIELD_TEXT, text, Field.Store.YES,
        Field.Index.ANALYZED));
    writer.addDocument(document);
  }
  
  /**
   * Tests two span queries.
   * 
   * @throws IOException
   */
  public void testBooleanQueryWithSpanQueries() throws IOException {
    
    doTestBooleanQueryWithSpanQueries(searcher, 0.3884282f);
  }
  
  /**
   * Tests two span queries.
   * 
   * @throws IOException
   */
  protected void doTestBooleanQueryWithSpanQueries(IndexSearcher s,
      final float expectedScore) throws IOException {
    
    final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work"));
    final BooleanQuery query = new BooleanQuery();
    query.add(spanQuery, BooleanClause.Occur.MUST);
    query.add(spanQuery, BooleanClause.Occur.MUST);
    final String[] expectedIds = new String[] {"1", "2", "3", "4"};
    final float[] expectedScores = new float[] {expectedScore, expectedScore,
        expectedScore, expectedScore};
    assertHits(s, query, "two span queries", expectedIds, expectedScores);
  }
  
  /**
   * Checks to see if the hits are what we expected.
   * 
   * @param query the query to execute
   * @param description the description of the search
   * @param expectedIds the expected document ids of the hits
   * @param expectedScores the expected scores of the hits
   * 
   * @throws IOException
   */
  protected static void assertHits(Searcher s, Query query,
      final String description, final String[] expectedIds,
      final float[] expectedScores) throws IOException {
    QueryUtils.check(query, s);
    
    final float tolerance = 1e-5f;
    
    // Hits hits = searcher.search(query);
    // hits normalizes and throws things off if one score is greater than 1.0
    TopDocs topdocs = s.search(query, null, 10000);
    
    /*****
     * // display the hits System.out.println(hits.length() +
     * " hits for search: \"" + description + '\"'); for (int i = 0; i <
     * hits.length(); i++) { System.out.println("  " + FIELD_ID + ':' +
     * hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')'); }
     *****/
    
    // did we get the hits we expected
    assertEquals(expectedIds.length, topdocs.totalHits);
    for (int i = 0; i < topdocs.totalHits; i++) {
      // System.out.println(i + " exp: " + expectedIds[i]);
      // System.out.println(i + " field: " + hits.doc(i).get(FIELD_ID));
      
      int id = topdocs.scoreDocs[i].doc;
      float score = topdocs.scoreDocs[i].score;
      Document doc = s.doc(id);
      assertEquals(expectedIds[i], doc.get(FIELD_ID));
      boolean scoreEq = Math.abs(expectedScores[i] - score) < tolerance;
      if (!scoreEq) {
        System.out.println(i + " warning, expected score: " + expectedScores[i]
            + ", actual " + score);
        System.out.println(s.explain(query, id));
      }
      assertEquals(expectedScores[i], score, tolerance);
      assertEquals(s.explain(query, id).getValue(), score, tolerance);
    }
  }
  
}