package lia.searching;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.util.Vector;
// From chapter 3
public class ScoreTest extends TestCase {
private Directory directory;
public void setUp() throws Exception {
directory = new RAMDirectory();
}
public void tearDown() throws Exception {
directory.close();
}
public void testSimple() throws Exception {
indexSingleFieldDocs(new Field[] {new Field("contents", "x", Field.Store.YES, Field.Index.ANALYZED)});
IndexSearcher searcher = new IndexSearcher(directory);
searcher.setSimilarity(new SimpleSimilarity());
Query query = new TermQuery(new Term("contents", "x"));
Explanation explanation = searcher.explain(query, 0);
System.out.println(explanation);
TopDocs matches = searcher.search(query, 10);
assertEquals(1, matches.totalHits);
assertEquals(1F, matches.scoreDocs[0].score, 0.0);
searcher.close();
}
private void indexSingleFieldDocs(Field[] fields) throws Exception {
IndexWriter writer = new IndexWriter(directory,
new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
for (Field f : fields) {
Document doc = new Document();
doc.add(f);
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
public void testWildcard() throws Exception {
indexSingleFieldDocs(new Field[]
{ new Field("contents", "wild", Field.Store.YES, Field.Index.ANALYZED),
new Field("contents", "child", Field.Store.YES, Field.Index.ANALYZED),
new Field("contents", "mild", Field.Store.YES, Field.Index.ANALYZED),
new Field("contents", "mildew", Field.Store.YES, Field.Index.ANALYZED) });
IndexSearcher searcher = new IndexSearcher(directory);
Query query = new WildcardQuery(new Term("contents", "?ild*")); //#A
TopDocs matches = searcher.search(query, 10);
assertEquals("child no match", 3, matches.totalHits);
assertEquals("score the same", matches.scoreDocs[0].score,
matches.scoreDocs[1].score, 0.0);
assertEquals("score the same", matches.scoreDocs[1].score,
matches.scoreDocs[2].score, 0.0);
searcher.close();
}
/*
#A Construct WildcardQuery using Term
*/
public void testFuzzy() throws Exception {
indexSingleFieldDocs(new Field[] { new Field("contents",
"fuzzy",
Field.Store.YES,
Field.Index.ANALYZED),
new Field("contents",
"wuzzy",
Field.Store.YES,
Field.Index.ANALYZED)
});
IndexSearcher searcher = new IndexSearcher(directory);
Query query = new FuzzyQuery(new Term("contents", "wuzza"));
TopDocs matches = searcher.search(query, 10);
assertEquals("both close enough", 2, matches.totalHits);
assertTrue("wuzzy closer than fuzzy",
matches.scoreDocs[0].score != matches.scoreDocs[1].score);
Document doc = searcher.doc(matches.scoreDocs[0].doc);
assertEquals("wuzza bear", "wuzzy", doc.get("contents"));
searcher.close();
}
public static class SimpleSimilarity extends Similarity {
public float lengthNorm(String field, int numTerms) {
return 1.0f;
}
public float queryNorm(float sumOfSquaredWeights) {
return 1.0f;
}
public float tf(float freq) {
return freq;
}
public float sloppyFreq(int distance) {
return 2.0f;
}
public float idf(Vector terms, Searcher searcher) {
return 1.0f;
}
public float idf(int docFreq, int numDocs) {
return 1.0f;
}
public float coord(int overlap, int maxOverlap) {
return 1.0f;
}
}
}