/* * Copyright 2004-2009 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.compass.core.lucene.engine; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.TokenSources; import org.compass.core.CompassHighlighter; import org.compass.core.Resource; import org.compass.core.engine.SearchEngineException; import org.compass.core.engine.SearchEngineHighlighter; import org.compass.core.lucene.LuceneEnvironment; import org.compass.core.lucene.LuceneResource; import org.compass.core.lucene.engine.analyzer.LuceneAnalyzerManager; import org.compass.core.lucene.engine.highlighter.LuceneHighlighterManager; import org.compass.core.lucene.engine.highlighter.LuceneHighlighterSettings; import org.compass.core.lucene.engine.highlighter.support.TokenOrderingFilter; /** * @author kimchy */ public class LuceneSearchEngineHighlighter implements SearchEngineHighlighter, LuceneDelegatedClose { private IndexReader indexReader; private boolean closed; private Query query; private LuceneHighlighterSettings highlighterSettings; private LuceneAnalyzerManager analyzerManager; private LuceneHighlighterManager highlighterManager; private int maxNumFragments = -1; private Analyzer analyzer; private String separator; private int maxBytesToAnalyze = -1; private CompassHighlighter.TextTokenizer textTokenizer; public LuceneSearchEngineHighlighter(Query query, IndexReader indexReader, LuceneSearchEngine searchEngine) throws SearchEngineException { this.indexReader = indexReader; this.highlighterManager = searchEngine.getSearchEngineFactory().getHighlighterManager(); this.highlighterSettings = highlighterManager.getDefaultHighlighterSettings(); this.analyzerManager = searchEngine.getSearchEngineFactory().getAnalyzerManager(); if (highlighterSettings.isRewriteQuery()) { try { this.query = query.rewrite(indexReader); } catch (IOException e) { throw new SearchEngineException("Failed to rewrite query [" + query + "] for highlighter", e); } } clear(); } public SearchEngineHighlighter clear() { analyzer = analyzerManager.getDefaultAnalyzer(); maxNumFragments = -1; separator = null; maxBytesToAnalyze = -1; return this; } public SearchEngineHighlighter setMaxNumFragments(int maxNumFragments) throws SearchEngineException { this.maxNumFragments = maxNumFragments; return this; } public SearchEngineHighlighter setMaxBytesToAnalyze(int maxBytesToAnalyze) throws SearchEngineException { this.maxBytesToAnalyze = maxBytesToAnalyze; return this; } public SearchEngineHighlighter setAnalyzer(String analyzerName) throws SearchEngineException { this.analyzer = analyzerManager.getAnalyzerMustExist(analyzerName); return this; } public SearchEngineHighlighter setAnalyzer(Resource resource) throws SearchEngineException { this.analyzer = analyzerManager.getAnalyzerByResource(resource); return this; } public SearchEngineHighlighter setHighlighter(String highlighterName) throws SearchEngineException { this.highlighterSettings = highlighterManager.getHighlighterSettingsMustExists(highlighterName); return this; } public SearchEngineHighlighter setSeparator(String separator) throws SearchEngineException { this.separator = separator; return this; } public SearchEngineHighlighter setTextTokenizer(CompassHighlighter.TextTokenizer textTokenizer) throws SearchEngineException { this.textTokenizer = textTokenizer; return this; } public String fragment(Resource resource, String propertyName) throws SearchEngineException { return fragment(resource, propertyName, getTextFromResource(resource, propertyName)); } public String fragment(Resource resource, String propertyName, String text) throws SearchEngineException { Highlighter highlighter = createHighlighter(propertyName); TokenStream tokenStream = createTokenStream(resource, propertyName, text); try { return highlighter.getBestFragment(tokenStream, text); } catch (IOException e) { throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias() + "] and property [" + propertyName + "]"); } } public String[] fragments(Resource resource, String propertyName) throws SearchEngineException { return fragments(resource, propertyName, getTextFromResource(resource, propertyName)); } public String[] fragments(Resource resource, String propertyName, String text) throws SearchEngineException { Highlighter highlighter = createHighlighter(propertyName); TokenStream tokenStream = createTokenStream(resource, propertyName, text); try { return highlighter.getBestFragments(tokenStream, text, getMaxNumFragments()); } catch (IOException e) { throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias() + "] and property [" + propertyName + "]"); } } public String fragmentsWithSeparator(Resource resource, String propertyName) throws SearchEngineException { return fragmentsWithSeparator(resource, propertyName, getTextFromResource(resource, propertyName)); } public String fragmentsWithSeparator(Resource resource, String propertyName, String text) throws SearchEngineException { Highlighter highlighter = createHighlighter(propertyName); TokenStream tokenStream = createTokenStream(resource, propertyName, text); try { String actualSeparator = getActualSeparator(); return highlighter.getBestFragments(tokenStream, text, getMaxNumFragments(), actualSeparator); } catch (IOException e) { throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias() + "] and property [" + propertyName + "]"); } } public String[] multiValueFragment(Resource resource, String propertyName) throws SearchEngineException { return multiValueFragment(resource, propertyName, getTextsFromResource(resource, propertyName)); } public String[] multiValueFragment(Resource resource, String propertyName, String[] texts) throws SearchEngineException { List fragmentList = new ArrayList(); Highlighter highlighter = createHighlighter(propertyName); for (int i = 0; i < texts.length; i++) { String text = texts[i]; if (text != null && text.length() > 0) { //TokenStream tokenStream = createTokenStream(resource, propertyName, text); // We have to re-analyze one field value at a time TokenStream tokenStream = createTokenStreamFromAnalyzer(propertyName, text); try { String fragment = highlighter.getBestFragment(tokenStream, text); if (fragment != null && fragment.length() > 0) { fragmentList.add(fragment); } } catch (IOException e) { throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias() + "] and property [" + propertyName + "]"); } } } return (String[]) fragmentList.toArray(new String[fragmentList.size()]); } public String multiValueFragmentWithSeparator(Resource resource, String propertyName) throws SearchEngineException { return multiValueFragmentWithSeparator(resource, propertyName, getTextsFromResource(resource, propertyName)); } public String multiValueFragmentWithSeparator(Resource resource, String propertyName, String[] texts) throws SearchEngineException { String[] fragments = multiValueFragment(resource, propertyName, texts); String actualSeparator = getActualSeparator(); StringBuffer fragment = new StringBuffer(); if (fragments.length > 0) { for (int i = 0; i < (fragments.length - 1); i++) { fragment.append(fragments[i]); fragment.append(actualSeparator); } fragment.append(fragments[fragments.length - 1]); } return fragment.toString(); } protected TokenStream createTokenStream(Resource resource, String propertyName, String text) throws SearchEngineException { CompassHighlighter.TextTokenizer actualTextTokenizer = highlighterSettings.getTextTokenizer(); if (textTokenizer != null) { actualTextTokenizer = textTokenizer; } if (actualTextTokenizer == CompassHighlighter.TextTokenizer.AUTO) { TokenStream tokenStream = createTokenStreamFromTermPositions(resource, propertyName); if (tokenStream == null) { tokenStream = createTokenStreamFromAnalyzer(propertyName, text); } return tokenStream; } else if (actualTextTokenizer == CompassHighlighter.TextTokenizer.ANALYZER) { return createTokenStreamFromAnalyzer(propertyName, text); } else if (actualTextTokenizer == CompassHighlighter.TextTokenizer.TERM_VECTOR) { TokenStream tokenStream = createTokenStreamFromTermPositions(resource, propertyName); if (tokenStream == null) { throw new SearchEngineException( "Highlighter configured/set to use term vector, but no term vector is available"); } return tokenStream; } throw new SearchEngineException("No handling for text tokenizer [" + actualTextTokenizer + "]"); } protected TokenStream createTokenStreamFromAnalyzer(String propertyName, String text) { TokenStream tokenStream = analyzer.tokenStream(propertyName, new StringReader(text)); if (tokenStream == null) { tokenStream = new TokenOrderingFilter(tokenStream, 10); } return tokenStream; } protected TokenStream createTokenStreamFromTermPositions(Resource resource, String propertyName) throws SearchEngineException { int docId = ((LuceneResource) resource).getDocNum(); TermFreqVector tfv; try { tfv = indexReader.getTermFreqVector(docId, propertyName); } catch (IOException e) { throw new SearchEngineException("Failed to read term vector info", e); } if (tfv != null) { if (tfv instanceof TermPositionVector) { return TokenSources.getTokenStream((TermPositionVector) tfv); } } return null; } protected Highlighter createHighlighter(String propertyName) throws SearchEngineException { Highlighter highlighter = new Highlighter(highlighterSettings.getFormatter(), highlighterSettings.getEncoder(), createScorer(propertyName)); Fragmenter f = highlighterSettings.getFragmenter(); highlighter.setTextFragmenter(f); if (maxBytesToAnalyze == -1) { highlighter.setMaxDocBytesToAnalyze(highlighterSettings.getMaxBytesToAnalyze()); } else { highlighter.setMaxDocBytesToAnalyze(maxBytesToAnalyze); } return highlighter; } protected Scorer createScorer(String propertyName) throws SearchEngineException { if (highlighterSettings.isComputeIdf()) { if (propertyName == null) { throw new SearchEngineException("When using a formatter that requires idf or setting the [" + LuceneEnvironment.Highlighter.COMPUTE_IDF + "] setting, a resource property name must be provided"); } return new QueryScorer(query, indexReader, propertyName); } return new QueryScorer(query); } private String getTextFromResource(Resource resource, String propertyName) { String text = resource.getValue(propertyName); if (text == null) { throw new SearchEngineException("No text is stored for property [" + propertyName + "] and alias [" + resource.getAlias() + "]"); } return text; } private String[] getTextsFromResource(Resource resource, String propertyName) { String[] texts = resource.getValues(propertyName); if (texts == null || texts.length == 0) { throw new SearchEngineException("No texts are stored for property [" + propertyName + "] and alias [" + resource.getAlias() + "]"); } return texts; } private int getMaxNumFragments() { if (maxNumFragments == -1) { return highlighterSettings.getMaxNumFragments(); } return maxNumFragments; } private String getActualSeparator() { String actualSeparator = separator; if (actualSeparator == null) { actualSeparator = highlighterSettings.getSeparator(); } return actualSeparator; } public void closeDelegate() throws SearchEngineException { close(true); } public void close() throws SearchEngineException { close(false); } private void close(boolean removeDelegate) throws SearchEngineException { if (closed) { return; } closed = true; } }