/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits;
/**
*
* @author Guest
*/
public class LanguageModelQuery extends Query {
private static int maxTermCount = 1024;
private float smooth;
private String field;
private ArrayList<Term> terms;
public LanguageModelQuery() {
this(0.0f);
}
public LanguageModelQuery(float smooth) {
this.terms = new ArrayList<Term>();
this.smooth = smooth;
}
public void addTerm(Term term) {
if (terms.size() >= maxTermCount)
//throw new TooManyTerms();
return;
if (terms.size() == 0)
field = term.field();
else if (term.field() != field)
throw new IllegalArgumentException("All Language Model terms must be in the same field: " + term);
terms.add(term);
}
public Term[] getTerms() {
return terms.toArray(new Term[terms.size()]);
}
public List<Term> terms() { return terms; }
public final Iterator<Term> iterator() { return terms().iterator(); }
protected class LanguageModelWeight extends Weight {
protected Similarity similarity;
public LanguageModelWeight(IndexSearcher searcher) {
this.similarity = searcher.getSimilarityProvider().get(field);
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Query getQuery() { return LanguageModelQuery.this; }
@Override
public float getValue() {
return getBoost();
}
@Override
public void normalize(float norm) {
//Do Nothing?
return;
}
@Override
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
final IndexReader reader = context.reader;
final byte[] norms = reader.norms(field);
float colLen = reader.terms(field).getSumTotalTermFreq();
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
final Bits delDocs = reader.getDeletedDocs();
for (int i = 0; i < terms.size(); i++) {
final Term t = terms.get(i);
float colFreq = reader.terms(field).totalTermFreq(t.bytes());
DocsEnum postingsEnum = reader.termDocsEnum(delDocs, t.field(), t.bytes());
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), colFreq);
}
if (smooth >0)
return new LanguageModelScorer(this, postingsFreqs, colLen, similarity, norms, smooth);
else
return new LanguageModelScorer(this, postingsFreqs, colLen, similarity, norms);
}
@Override
public float sumOfSquaredWeights() throws IOException {
return getBoost() * getBoost();
}
}
@Override
public Weight weight(IndexSearcher searcher) throws IOException {
return new LanguageModelWeight(searcher);
}
static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
final DocsEnum postings;
final int docFreq;
final float colFreq;
public PostingsAndFreq(DocsEnum postings, int docFreq, float colFreq) {
this.postings = postings;
this.docFreq = docFreq;
this.colFreq = colFreq;
}
public int compareTo(PostingsAndFreq other) {
return docFreq - other.docFreq;
}
}
@Override
public String toString(String field) {
String tmp = "";
if (field.equals(field)) {
for(int i=0;i<terms.size()-1;i++) {
tmp += terms.get(i) + " ";
}
tmp += terms.get(terms.size()-1);
return tmp;
}
return "";
}
}