/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.highlight; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.vectorhighlight.*; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.lucene.document.SingleFieldSelector; import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.StringText; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder; import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder; import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.SearchLookup; import java.util.*; import static com.google.common.collect.Maps.newHashMap; /** * */ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { public static class Encoders { public static Encoder DEFAULT = new DefaultEncoder(); public static Encoder HTML = new SimpleHTMLEncoder(); } private final boolean termVectorMultiValue; @Inject public HighlightPhase(Settings settings) { super(settings); this.termVectorMultiValue = componentSettings.getAsBoolean("term_vector_multi_value", true); } @Override public Map<String, ? extends SearchParseElement> parseElements() { return ImmutableMap.of("highlight", new HighlighterParseElement()); } @Override public boolean hitsExecutionNeeded(SearchContext context) { return false; } @Override public void hitsExecute(SearchContext context, InternalSearchHit[] hits) throws ElasticSearchException { } @Override public boolean hitExecutionNeeded(SearchContext context) { return context.highlight() != null; } @Override public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException { // we use a cache to cache heavy things, mainly the rewrite in FieldQuery for FVH HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get("highlight"); if (cache == null) { cache = new HighlighterEntry(); hitContext.cache().put("highlight", cache); } DocumentMapper documentMapper = context.mapperService().documentMapper(hitContext.hit().type()); Map<String, HighlightField> highlightFields = newHashMap(); for (SearchContextHighlight.Field field : context.highlight().fields()) { Encoder encoder; if (field.encoder().equals("html")) { encoder = Encoders.HTML; } else { encoder = Encoders.DEFAULT; } FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field()); if (mapper == null) { MapperService.SmartNameFieldMappers fullMapper = context.mapperService().smartName(field.field()); if (fullMapper == null || !fullMapper.hasDocMapper()) { //Save skipping missing fields continue; } if (!fullMapper.docMapper().type().equals(hitContext.hit().type())) { continue; } mapper = fullMapper.mapper(); if (mapper == null) { continue; } } // if we can do highlighting using Term Vectors, use FastVectorHighlighter, otherwise, use the // slower plain highlighter if (mapper.termVector() != Field.TermVector.WITH_POSITIONS_OFFSETS) { MapperHighlightEntry entry = cache.mappers.get(mapper); if (entry == null) { // Don't use the context.query() since it might be rewritten, and we need to pass the non rewritten queries to // let the highlighter handle MultiTerm ones // QueryScorer uses WeightedSpanTermExtractor to extract terms, but we can't really plug into // it, so, we hack here (and really only support top level queries) Query query = context.parsedQuery().query(); while (true) { boolean extracted = false; if (query instanceof FunctionScoreQuery) { query = ((FunctionScoreQuery) query).getSubQuery(); extracted = true; } else if (query instanceof FiltersFunctionScoreQuery) { query = ((FiltersFunctionScoreQuery) query).getSubQuery(); extracted = true; } else if (query instanceof ConstantScoreQuery) { ConstantScoreQuery q = (ConstantScoreQuery) query; if (q.getQuery() != null) { query = q.getQuery(); extracted = true; } } else if (query instanceof FilteredQuery) { query = ((FilteredQuery) query).getQuery(); extracted = true; } if (!extracted) { break; } } QueryScorer queryScorer = new QueryScorer(query, field.requireFieldMatch() ? mapper.names().indexName() : null); queryScorer.setExpandMultiTermQuery(true); Fragmenter fragmenter; if (field.numberOfFragments() == 0) { fragmenter = new NullFragmenter(); } else { fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize()); } Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]); entry = new MapperHighlightEntry(); entry.highlighter = new Highlighter(formatter, encoder, queryScorer); entry.highlighter.setTextFragmenter(fragmenter); // always highlight across all data entry.highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); cache.mappers.put(mapper, entry); } List<Object> textsToHighlight; if (mapper.stored()) { try { Document doc = hitContext.reader().document(hitContext.docId(), new SingleFieldSelector(mapper.names().indexName())); textsToHighlight = new ArrayList<Object>(doc.getFields().size()); for (Fieldable docField : doc.getFields()) { if (docField.stringValue() != null) { textsToHighlight.add(docField.stringValue()); } } } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } } else { SearchLookup lookup = context.lookup(); lookup.setNextReader(hitContext.reader()); lookup.setNextDocId(hitContext.docId()); textsToHighlight = lookup.source().extractRawValues(mapper.names().sourcePath()); } // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments(); ArrayList<TextFragment> fragsList = new ArrayList<TextFragment>(); try { for (Object textToHighlight : textsToHighlight) { String text = textToHighlight.toString(); Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer(); TokenStream tokenStream = analyzer.reusableTokenStream(mapper.names().indexName(), new FastStringReader(text)); TextFragment[] bestTextFragments = entry.highlighter.getBestTextFragments(tokenStream, text, false, numberOfFragments); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment != null && bestTextFragment.getScore() > 0) { fragsList.add(bestTextFragment); } } } } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } if (field.scoreOrdered()) { Collections.sort(fragsList, new Comparator<TextFragment>() { public int compare(TextFragment o1, TextFragment o2) { return Math.round(o2.getScore() - o1.getScore()); } }); } String[] fragments = null; // number_of_fragments is set to 0 but we have a multivalued field if (field.numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) { fragments = new String[fragsList.size()]; for (int i = 0; i < fragsList.size(); i++) { fragments[i] = fragsList.get(i).toString(); } } else { // refine numberOfFragments if needed numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments; fragments = new String[numberOfFragments]; for (int i = 0; i < fragments.length; i++) { fragments[i] = fragsList.get(i).toString(); } } if (fragments != null && fragments.length > 0) { HighlightField highlightField = new HighlightField(field.field(), StringText.convertFromStringArray(fragments)); highlightFields.put(highlightField.name(), highlightField); } } else { try { MapperHighlightEntry entry = cache.mappers.get(mapper); FieldQuery fieldQuery = null; if (entry == null) { FragListBuilder fragListBuilder; AbstractFragmentsBuilder fragmentsBuilder; BoundaryScanner boundaryScanner = SimpleBoundaryScanner2.DEFAULT; if (field.boundaryMaxScan() != SimpleBoundaryScanner2.DEFAULT_MAX_SCAN || field.boundaryChars() != SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS) { boundaryScanner = new SimpleBoundaryScanner2(field.boundaryMaxScan(), field.boundaryChars()); } if (field.numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); if (mapper.stored()) { fragmentsBuilder = new XSimpleFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); } } else { if (field.fragmentOffset() == -1) fragListBuilder = new SimpleFragListBuilder(); else fragListBuilder = new SimpleFragListBuilder(field.fragmentOffset()); if (field.scoreOrdered()) { if (mapper.stored()) { fragmentsBuilder = new XScoreOrderFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); } } else { if (mapper.stored()) { fragmentsBuilder = new XSimpleFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); } } } fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue); entry = new MapperHighlightEntry(); entry.fragListBuilder = fragListBuilder; entry.fragmentsBuilder = fragmentsBuilder; if (cache.fvh == null) { // parameters to FVH are not requires since: // first two booleans are not relevant since they are set on the CustomFieldQuery (phrase and fieldMatch) // fragment builders are used explicitly cache.fvh = new FastVectorHighlighter(); } CustomFieldQuery.highlightFilters.set(field.highlightFilter()); if (field.requireFieldMatch()) { if (cache.fieldMatchFieldQuery == null) { // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...) cache.fieldMatchFieldQuery = new CustomFieldQuery(context.parsedQuery().query(), hitContext.topLevelReader(), true, field.requireFieldMatch()); } fieldQuery = cache.fieldMatchFieldQuery; } else { if (cache.noFieldMatchFieldQuery == null) { // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...) cache.noFieldMatchFieldQuery = new CustomFieldQuery(context.parsedQuery().query(), hitContext.topLevelReader(), true, field.requireFieldMatch()); } fieldQuery = cache.noFieldMatchFieldQuery; } cache.mappers.put(mapper, entry); } String[] fragments; // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments(); // we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.preTags(), field.postTags(), encoder); if (fragments != null && fragments.length > 0) { HighlightField highlightField = new HighlightField(field.field(), StringText.convertFromStringArray(fragments)); highlightFields.put(highlightField.name(), highlightField); } } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } } } hitContext.hit().highlightFields(highlightFields); } static class MapperHighlightEntry { public FragListBuilder fragListBuilder; public FragmentsBuilder fragmentsBuilder; public Highlighter highlighter; } static class HighlighterEntry { public FastVectorHighlighter fvh; public FieldQuery noFieldMatchFieldQuery; public FieldQuery fieldMatchFieldQuery; public Map<FieldMapper, MapperHighlightEntry> mappers = Maps.newHashMap(); } }