/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.fetch.subphase.highlight;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.Snippet;
import org.apache.lucene.search.uhighlight.BoundedBreakIteratorScanner;
import org.apache.lucene.search.uhighlight.CustomPassageFormatter;
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
import static org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter.filterSnippets;
import static org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter.mergeFieldValues;
public class UnifiedHighlighter implements Highlighter {
private static final String CACHE_KEY = "highlight-unified";
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return true;
}
@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
FieldMapper fieldMapper = highlighterContext.mapper;
SearchContextHighlight.Field field = highlighterContext.field;
SearchContext context = highlighterContext.context;
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
if (!hitContext.cache().containsKey(CACHE_KEY)) {
hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
}
HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper);
if (mapperHighlighterEntry == null) {
Encoder encoder = field.fieldOptions().encoder().equals("html") ?
HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
CustomPassageFormatter passageFormatter =
new CustomPassageFormatter(field.fieldOptions().preTags()[0],
field.fieldOptions().postTags()[0], encoder);
mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter);
}
List<Snippet> snippets = new ArrayList<>();
int numberOfFragments;
try {
Analyzer analyzer =
context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
fieldValues = fieldValues.stream().map(obj -> {
if (obj instanceof BytesRef) {
return fieldMapper.fieldType().valueForDisplay(obj).toString();
} else {
return obj;
}
}).collect(Collectors.toList());
IndexSearcher searcher = new IndexSearcher(hitContext.reader());
CustomUnifiedHighlighter highlighter;
if (field.fieldOptions().numberOfFragments() == 0) {
// we use a control char to separate values, which is the only char that the custom break iterator
// breaks the text on, so we don't lose the distinction between the different values of a field and we
// get back a snippet per value
String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator breakIterator =
new org.apache.lucene.search.postingshighlight
.CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
highlighter =
new CustomUnifiedHighlighter(searcher, analyzer, mapperHighlighterEntry.passageFormatter,
field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue,
field.fieldOptions().noMatchSize());
numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value
} else {
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
BreakIterator bi = getBreakIterator(field);
highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
mapperHighlighterEntry.passageFormatter, field.fieldOptions().boundaryScannerLocale(), bi,
fieldValue, field.fieldOptions().noMatchSize());
numberOfFragments = field.fieldOptions().numberOfFragments();
}
if (field.fieldOptions().requireFieldMatch()) {
final String fieldName = highlighterContext.fieldName;
highlighter.setFieldMatcher((name) -> fieldName.equals(name));
} else {
highlighter.setFieldMatcher((name) -> true);
}
Snippet[] fieldSnippets = highlighter.highlightField(highlighterContext.fieldName,
highlighterContext.query, hitContext.docId(), numberOfFragments);
for (Snippet fieldSnippet : fieldSnippets) {
if (Strings.hasText(fieldSnippet.getText())) {
snippets.add(fieldSnippet);
}
}
} catch (IOException e) {
throw new FetchPhaseExecutionException(context,
"Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
if (field.fieldOptions().scoreOrdered()) {
//let's sort the snippets by score if needed
CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
}
String[] fragments = new String[snippets.size()];
for (int i = 0; i < fragments.length; i++) {
fragments[i] = snippets.get(i).getText();
}
if (fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
}
return null;
}
private BreakIterator getBreakIterator(SearchContextHighlight.Field field) {
final SearchContextHighlight.FieldOptions fieldOptions = field.fieldOptions();
final Locale locale =
fieldOptions.boundaryScannerLocale() != null ? fieldOptions.boundaryScannerLocale() :
Locale.ROOT;
final HighlightBuilder.BoundaryScannerType type =
fieldOptions.boundaryScannerType() != null ? fieldOptions.boundaryScannerType() :
HighlightBuilder.BoundaryScannerType.SENTENCE;
int maxLen = fieldOptions.fragmentCharSize();
switch (type) {
case SENTENCE:
if (maxLen > 0) {
return BoundedBreakIteratorScanner.getSentence(locale, maxLen);
}
return BreakIterator.getSentenceInstance(locale);
case WORD:
// ignore maxLen
return BreakIterator.getWordInstance(locale);
default:
throw new IllegalArgumentException("Invalid boundary scanner type: " + type.toString());
}
}
private static class HighlighterEntry {
Map<FieldMapper, MapperHighlighterEntry> mappers = new HashMap<>();
}
private static class MapperHighlighterEntry {
final CustomPassageFormatter passageFormatter;
private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) {
this.passageFormatter = passageFormatter;
}
}
}