/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.fetch.subphase.highlight;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
import org.apache.lucene.search.vectorhighlight.FieldFragList;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;
import org.apache.lucene.search.vectorhighlight.SimpleFieldFragList;
import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder;
import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
import org.elasticsearch.search.internal.SearchContext;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
public class FastVectorHighlighter implements Highlighter {
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
private static final BoundaryScanner DEFAULT_SENTENCE_BOUNDARY_SCANNER =
new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(Locale.ROOT));
private static final BoundaryScanner DEFAULT_WORD_BOUNDARY_SCANNER =
new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(Locale.ROOT));
public static final Setting<Boolean> SETTING_TV_HIGHLIGHT_MULTI_VALUE =
Setting.boolSetting("search.highlight.term_vector_multi_value", true, Setting.Property.NodeScope);
private static final String CACHE_KEY = "highlight-fsv";
private final Boolean termVectorMultiValue;
public FastVectorHighlighter(Settings settings) {
this.termVectorMultiValue = SETTING_TV_HIGHLIGHT_MULTI_VALUE.get(settings);
}
@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
SearchContextHighlight.Field field = highlighterContext.field;
SearchContext context = highlighterContext.context;
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
FieldMapper mapper = highlighterContext.mapper;
if (canHighlight(mapper) == false) {
throw new IllegalArgumentException("the field [" + highlighterContext.fieldName +
"] should be indexed with term vector with position offsets to be used with fast vector highlighter");
}
Encoder encoder = field.fieldOptions().encoder().equals("html") ?
HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
if (!hitContext.cache().containsKey(CACHE_KEY)) {
hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
}
HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
try {
FieldQuery fieldQuery;
if (field.fieldOptions().requireFieldMatch()) {
if (cache.fieldMatchFieldQuery == null) {
/*
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query,
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.fieldMatchFieldQuery;
} else {
if (cache.noFieldMatchFieldQuery == null) {
/*
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query,
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.noFieldMatchFieldQuery;
}
MapperHighlightEntry entry = cache.mappers.get(mapper);
if (entry == null) {
FragListBuilder fragListBuilder;
BaseFragmentsBuilder fragmentsBuilder;
final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
boolean forceSource = context.highlight().forceSource(field);
if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();
if (!forceSource && mapper.fieldType().stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context,
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
if (field.fieldOptions().scoreOrdered()) {
if (!forceSource && mapper.fieldType().stored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context,
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
if (!forceSource && mapper.fieldType().stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder =
new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
}
}
}
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
entry = new MapperHighlightEntry();
entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilder = fragmentsBuilder;
if (cache.fvh == null) {
// parameters to FVH are not requires since:
// first two booleans are not relevant since they are set on the CustomFieldQuery
// (phrase and fieldMatch) fragment builders are used explicitly
cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
}
CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
cache.mappers.put(mapper, entry);
}
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
String[] fragments;
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
Integer.MAX_VALUE : field.fieldOptions().numberOfFragments();
int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ?
Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize();
// we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible
// Only send matched fields if they were requested to save time.
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
mapper.fieldType().name(), field.fieldOptions().matchedFields(), fragmentCharSize,
numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
} else {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
mapper.fieldType().name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
}
if (fragments != null && fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
}
int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
if (noMatchSize > 0) {
// Essentially we just request that a fragment is built from 0 to noMatchSize using
// the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
mapper.fieldType().name(), fieldFragList, 1, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
if (fragments != null && fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
}
}
return null;
} catch (Exception e) {
throw new FetchPhaseExecutionException(context,
"Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
}
@Override
public boolean canHighlight(FieldMapper fieldMapper) {
return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets()
&& fieldMapper.fieldType().storeTermVectorPositions();
}
private static BoundaryScanner getBoundaryScanner(Field field) {
final FieldOptions fieldOptions = field.fieldOptions();
final Locale boundaryScannerLocale =
fieldOptions.boundaryScannerLocale() != null ? fieldOptions.boundaryScannerLocale() :
Locale.ROOT;
final HighlightBuilder.BoundaryScannerType type =
fieldOptions.boundaryScannerType() != null ? fieldOptions.boundaryScannerType() :
HighlightBuilder.BoundaryScannerType.CHARS;
switch(type) {
case SENTENCE:
if (boundaryScannerLocale != null) {
return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale));
}
return DEFAULT_SENTENCE_BOUNDARY_SCANNER;
case WORD:
if (boundaryScannerLocale != null) {
return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale));
}
return DEFAULT_WORD_BOUNDARY_SCANNER;
case CHARS:
if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
|| fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars());
}
return DEFAULT_SIMPLE_BOUNDARY_SCANNER;
default:
throw new IllegalArgumentException("Invalid boundary scanner type: " + type.toString());
}
}
private class MapperHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
}
private class HighlighterEntry {
public org.apache.lucene.search.vectorhighlight.FastVectorHighlighter fvh;
public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery;
public Map<FieldMapper, MapperHighlightEntry> mappers = new HashMap<>();
}
}