TestFieldNormModifier.java example

Explorer
solrcene-master
package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Arrays;
import java.util.Random;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;

/**
 * Tests changing of field norms with a custom similarity and with fake norms.
 */
public class TestFieldNormModifier extends LuceneTestCase {
  
  public TestFieldNormModifier(String name) {
    super(name);
  }
   
  public static int NUM_DOCS = 5;
  
  public Directory store;
  
  /** inverts the normal notion of lengthNorm */
  public static Similarity s = new DefaultSimilarity() {
    @Override
    public float lengthNorm(String fieldName, int numTokens) {
      return numTokens;
    }
  };
  
  @Override
  protected void setUp() throws Exception {
    super.setUp();
    Random random = newRandom();
    store = newDirectory(random);
    IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(random,
        TEST_VERSION_CURRENT, new MockAnalyzer()));
    
    for (int i = 0; i < NUM_DOCS; i++) {
      Document d = new Document();
      d.add(new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED));
      d.add(new Field("nonorm", "word", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.ANALYZED));
      
      for (int j = 1; j <= i; j++) {
        d.add(new Field("field", "crap", Field.Store.YES, Field.Index.ANALYZED));
        d.add(new Field("nonorm", "more words", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
      }
      writer.addDocument(d);
    }
    writer.close();
  }
  
  @Override
  protected void tearDown() throws Exception {
    store.close();
    super.tearDown();
  }
  
  public void testMissingField() throws Exception {
    FieldNormModifier fnm = new FieldNormModifier(store, s);
    fnm.reSetNorms("nobodyherebutuschickens");
  }
  
  public void testFieldWithNoNorm() throws Exception {
    
    IndexReader r = IndexReader.open(store, false);
    byte[] norms = r.norms("nonorm");
    
    // sanity check, norms should all be 1
    assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
    assertNull(norms);
    
    r.close();
    
    FieldNormModifier fnm = new FieldNormModifier(store, s);
    fnm.reSetNorms("nonorm");
    
    // nothing should have changed
    r = IndexReader.open(store, false);
    
    norms = r.norms("nonorm");
    assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
    assertNull(norms);

    r.close();
  }
  
  
  public void testGoodCases() throws Exception {
    
    IndexSearcher searcher = new IndexSearcher(store, true);
    final float[] scores = new float[NUM_DOCS];
    float lastScore = 0.0f;
    
    // default similarity should put docs with shorter length first
    searcher.search(new TermQuery(new Term("field", "word")), new Collector() {
      private int docBase = 0;
      private Scorer scorer;
      
      @Override
      public final void collect(int doc) throws IOException {
        scores[doc + docBase] = scorer.score();
      }
      @Override
      public void setNextReader(IndexReader reader, int docBase) {
        this.docBase = docBase;
      }
      @Override
      public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
      }
      @Override
      public boolean acceptsDocsOutOfOrder() {
        return true;
      }
    });
    searcher.close();
    
    lastScore = Float.MAX_VALUE;
    for (int i = 0; i < NUM_DOCS; i++) {
      String msg = "i=" + i + ", " + scores[i] + " <= " + lastScore;
      assertTrue(msg, scores[i] <= lastScore);
      //System.out.println(msg);
      lastScore = scores[i];
    }

    FieldNormModifier fnm = new FieldNormModifier(store, s);
    fnm.reSetNorms("field");
    
    // new norm (with default similarity) should put longer docs first
    searcher = new IndexSearcher(store, true);
    searcher.search(new TermQuery(new Term("field", "word")),  new Collector() {
      private int docBase = 0;
      private Scorer scorer;
      @Override
      public final void collect(int doc) throws IOException {
        scores[doc + docBase] = scorer.score();
      }
      @Override
      public void setNextReader(IndexReader reader, int docBase) {
        this.docBase = docBase;
      }
      @Override
      public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
      }
      @Override
      public boolean acceptsDocsOutOfOrder() {
        return true;
      }
    });
    searcher.close();
    
    lastScore = 0.0f;
    for (int i = 0; i < NUM_DOCS; i++) {
      String msg = "i=" + i + ", " + scores[i] + " >= " + lastScore;
      assertTrue(msg, scores[i] >= lastScore);
      //System.out.println(msg);
      lastScore = scores[i];
    }
  }

  public void testNormKiller() throws IOException {

    IndexReader r = IndexReader.open(store, false);
    byte[] oldNorms = r.norms("untokfield");    
    r.close();
    
    FieldNormModifier fnm = new FieldNormModifier(store, s);
    fnm.reSetNorms("untokfield");

    r = IndexReader.open(store, false);
    byte[] newNorms = r.norms("untokfield");
    r.close();
    assertFalse(Arrays.equals(oldNorms, newNorms));    

    
    // verify that we still get documents in the same order as originally
    IndexSearcher searcher = new IndexSearcher(store, true);
    final float[] scores = new float[NUM_DOCS];
    float lastScore = 0.0f;
    
    // default similarity should return the same score for all documents for this query
    searcher.search(new TermQuery(new Term("untokfield", "20061212")), new Collector() {
      private int docBase = 0;
      private Scorer scorer;
      @Override
      public final void collect(int doc) throws IOException {
        scores[doc + docBase] = scorer.score();
      }
      @Override
      public void setNextReader(IndexReader reader, int docBase) {
        this.docBase = docBase;
      }
      @Override
      public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
      }
      @Override
      public boolean acceptsDocsOutOfOrder() {
        return true;
      }
    });
    searcher.close();
    
    lastScore = scores[0];
    for (int i = 0; i < NUM_DOCS; i++) {
      String msg = "i=" + i + ", " + scores[i] + " == " + lastScore;
      assertTrue(msg, scores[i] == lastScore);
      //System.out.println(msg);
      lastScore = scores[i];
    }
  }
}