/*
* Created on 25-Jan-2006
*/
package org.apache.lucene.search.similar;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similar.MoreLikeThis;
/**
* A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg
* in custom QueryParser extensions. At query.rewrite() time the reader is used to construct the
* actual MoreLikeThis object and obtain the real Query object.
*/
public class MoreLikeThisQuery extends Query
{
private String likeText;
private String[] moreLikeFields;
private Analyzer analyzer;
float percentTermsToMatch=0.3f;
int minTermFrequency=1;
int maxQueryTerms=5;
Set<?> stopWords=null;
int minDocFreq=-1;
/**
* @param moreLikeFields
*/
public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer)
{
this.likeText=likeText;
this.moreLikeFields=moreLikeFields;
this.analyzer=analyzer;
}
@Override
public Query rewrite(IndexReader reader) throws IOException
{
MoreLikeThis mlt=new MoreLikeThis(reader);
mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
mlt.setMinTermFreq(minTermFrequency);
if(minDocFreq>=0)
{
mlt.setMinDocFreq(minDocFreq);
}
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setStopWords(stopWords);
BooleanQuery bq= (BooleanQuery) mlt.like(new ByteArrayInputStream(likeText.getBytes()));
BooleanClause[] clauses = bq.getClauses();
//make at least half the terms match
bq.setMinimumNumberShouldMatch((int)(clauses.length*percentTermsToMatch));
return bq;
}
/* (non-Javadoc)
* @see org.apache.lucene.search.Query#toString(java.lang.String)
*/
@Override
public String toString(String field)
{
return "like:"+likeText;
}
public float getPercentTermsToMatch() {
return percentTermsToMatch;
}
public void setPercentTermsToMatch(float percentTermsToMatch) {
this.percentTermsToMatch = percentTermsToMatch;
}
public Analyzer getAnalyzer()
{
return analyzer;
}
public void setAnalyzer(Analyzer analyzer)
{
this.analyzer = analyzer;
}
public String getLikeText()
{
return likeText;
}
public void setLikeText(String likeText)
{
this.likeText = likeText;
}
public int getMaxQueryTerms()
{
return maxQueryTerms;
}
public void setMaxQueryTerms(int maxQueryTerms)
{
this.maxQueryTerms = maxQueryTerms;
}
public int getMinTermFrequency()
{
return minTermFrequency;
}
public void setMinTermFrequency(int minTermFrequency)
{
this.minTermFrequency = minTermFrequency;
}
public String[] getMoreLikeFields()
{
return moreLikeFields;
}
public void setMoreLikeFields(String[] moreLikeFields)
{
this.moreLikeFields = moreLikeFields;
}
public Set<?> getStopWords()
{
return stopWords;
}
public void setStopWords(Set<?> stopWords)
{
this.stopWords = stopWords;
}
public int getMinDocFreq()
{
return minDocFreq;
}
public void setMinDocFreq(int minDocFreq)
{
this.minDocFreq = minDocFreq;
}
}