package org.apache.lucene.search.highlight;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.StringHelper;
/**
* {@link Scorer} implementation which scores text fragments by the number of
* unique query terms found. This class converts appropriate {@link Query}s to
* {@link SpanQuery}s and attempts to score only those terms that participated in
* generating the 'hit' on the document.
*/
public class QueryScorer implements Scorer {
private float totalScore;
private Set<String> foundTerms;
private Map<String,WeightedSpanTerm> fieldWeightedSpanTerms;
private float maxTermWeight;
private int position = -1;
private String defaultField;
private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncAtt;
private boolean expandMultiTermQuery = true;
private Query query;
private String field;
private IndexReader reader;
private boolean skipInitExtractor;
private boolean wrapToCaching = true;
/**
* @param query Query to use for highlighting
*/
public QueryScorer(Query query) {
init(query, null, null, true);
}
/**
* @param query Query to use for highlighting
* @param field Field to highlight - pass null to ignore fields
*/
public QueryScorer(Query query, String field) {
init(query, field, null, true);
}
/**
* @param query Query to use for highlighting
* @param field Field to highlight - pass null to ignore fields
* @param reader {@link IndexReader} to use for quasi tf/idf scoring
*/
public QueryScorer(Query query, IndexReader reader, String field) {
init(query, field, reader, true);
}
/**
* @param query to use for highlighting
* @param reader {@link IndexReader} to use for quasi tf/idf scoring
* @param field to highlight - pass null to ignore fields
* @param defaultField
*/
public QueryScorer(Query query, IndexReader reader, String field, String defaultField) {
this.defaultField = StringHelper.intern(defaultField);
init(query, field, reader, true);
}
/**
* @param defaultField - The default field for queries with the field name unspecified
*/
public QueryScorer(Query query, String field, String defaultField) {
this.defaultField = StringHelper.intern(defaultField);
init(query, field, null, true);
}
/**
* @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s
*/
public QueryScorer(WeightedSpanTerm[] weightedTerms) {
this.fieldWeightedSpanTerms = new HashMap<String,WeightedSpanTerm>(weightedTerms.length);
for (int i = 0; i < weightedTerms.length; i++) {
WeightedSpanTerm existingTerm = fieldWeightedSpanTerms.get(weightedTerms[i].term);
if ((existingTerm == null) ||
(existingTerm.weight < weightedTerms[i].weight)) {
// if a term is defined more than once, always use the highest
// scoring weight
fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
}
}
skipInitExtractor = true;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
*/
public float getFragmentScore() {
return totalScore;
}
/**
*
* @return The highest weighted term (useful for passing to
* GradientFormatter to set top end of coloring scale).
*/
public float getMaxTermWeight() {
return maxTermWeight;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
* int)
*/
public float getTokenScore() {
position += posIncAtt.getPositionIncrement();
String termText = termAtt.toString();
WeightedSpanTerm weightedSpanTerm;
if ((weightedSpanTerm = fieldWeightedSpanTerms.get(
termText)) == null) {
return 0;
}
if (weightedSpanTerm.positionSensitive &&
!weightedSpanTerm.checkPosition(position)) {
return 0;
}
float score = weightedSpanTerm.getWeight();
// found a query term - is it unique in this doc?
if (!foundTerms.contains(termText)) {
totalScore += score;
foundTerms.add(termText);
}
return score;
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
*/
public TokenStream init(TokenStream tokenStream) throws IOException {
position = -1;
termAtt = tokenStream.addAttribute(CharTermAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
if(!skipInitExtractor) {
if(fieldWeightedSpanTerms != null) {
fieldWeightedSpanTerms.clear();
}
return initExtractor(tokenStream);
}
return null;
}
/**
* Retrieve the {@link WeightedSpanTerm} for the specified token. Useful for passing
* Span information to a {@link Fragmenter}.
*
* @param token to get {@link WeightedSpanTerm} for
* @return WeightedSpanTerm for token
*/
public WeightedSpanTerm getWeightedSpanTerm(String token) {
return fieldWeightedSpanTerms.get(token);
}
/**
*/
private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) {
this.reader = reader;
this.expandMultiTermQuery = expandMultiTermQuery;
this.query = query;
this.field = field;
}
private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
: new WeightedSpanTermExtractor(defaultField);
qse.setExpandMultiTermQuery(expandMultiTermQuery);
qse.setWrapIfNotCachingTokenFilter(wrapToCaching);
if (reader == null) {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
tokenStream, field);
} else {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
tokenStream, field, reader);
}
if(qse.isCachedTokenStream()) {
return qse.getTokenStream();
}
return null;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
foundTerms = new HashSet<String>();
totalScore = 0;
}
/**
* @return true if multi-term queries should be expanded
*/
public boolean isExpandMultiTermQuery() {
return expandMultiTermQuery;
}
/**
* Controls whether or not multi-term queries are expanded
* against a {@link MemoryIndex} {@link IndexReader}.
*
* @param expandMultiTermQuery true if multi-term queries should be expanded
*/
public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
this.expandMultiTermQuery = expandMultiTermQuery;
}
/**
* By default, {@link TokenStream}s that are not of the type
* {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
* ensure an efficient reset - if you are already using a different caching
* {@link TokenStream} impl and you don't want it to be wrapped, set this to
* false.
*
* @param wrap
*/
public void setWrapIfNotCachingTokenFilter(boolean wrap) {
this.wrapToCaching = wrap;
}
}