/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.percolator;
import static org.elasticsearch.common.util.CollectionUtils.eagerTransform;
import static org.elasticsearch.index.mapper.SourceToParse.source;
import static org.elasticsearch.percolator.QueryCollector.count;
import static org.elasticsearch.percolator.QueryCollector.match;
import static org.elasticsearch.percolator.QueryCollector.matchAndScore;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.memory.ExtendedMemoryIndex;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.elassandra.cluster.InternalCassandraClusterService;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.percolate.PercolateResponse;
import org.elasticsearch.action.percolate.PercolateShardRequest;
import org.elasticsearch.action.percolate.PercolateShardResponse;
import org.elasticsearch.cache.recycler.PageCacheRecycler;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.common.HasContextAndHeaders;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.percolator.stats.ShardPercolateService;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.percolator.QueryCollector.Count;
import org.elasticsearch.percolator.QueryCollector.Match;
import org.elasticsearch.percolator.QueryCollector.MatchAndScore;
import org.elasticsearch.percolator.QueryCollector.MatchAndSort;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.aggregations.AggregationPhase;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.pipeline.SiblingPipelineAggregator;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.highlight.HighlightPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.sort.SortParseElement;
import com.carrotsearch.hppc.IntObjectHashMap;
public class PercolatorService extends AbstractComponent {
public final static float NO_SCORE = Float.NEGATIVE_INFINITY;
public final static String TYPE_NAME = ".percolator";
private final IndexNameExpressionResolver indexNameExpressionResolver;
private final IndicesService indicesService;
private final IntObjectHashMap<PercolatorType> percolatorTypes;
private final PageCacheRecycler pageCacheRecycler;
private final BigArrays bigArrays;
private final ClusterService clusterService;
private final PercolatorIndex single;
private final PercolatorIndex multi;
private final HighlightPhase highlightPhase;
private final AggregationPhase aggregationPhase;
private final SortParseElement sortParseElement;
private final ScriptService scriptService;
private final MappingUpdatedAction mappingUpdatedAction;
private final CloseableThreadLocal<MemoryIndex> cache;
private final ParseFieldMatcher parseFieldMatcher;
@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
HighlightPhase highlightPhase, ClusterService clusterService,
AggregationPhase aggregationPhase, ScriptService scriptService,
MappingUpdatedAction mappingUpdatedAction) {
super(settings);
this.indexNameExpressionResolver = indexNameExpressionResolver;
this.parseFieldMatcher = new ParseFieldMatcher(settings);
this.indicesService = indicesService;
this.pageCacheRecycler = pageCacheRecycler;
this.bigArrays = bigArrays;
this.clusterService = clusterService;
this.highlightPhase = highlightPhase;
this.aggregationPhase = aggregationPhase;
this.scriptService = scriptService;
this.mappingUpdatedAction = mappingUpdatedAction;
this.sortParseElement = new SortParseElement();
final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
cache = new CloseableThreadLocal<MemoryIndex>() {
@Override
protected MemoryIndex initialValue() {
// TODO: should we expose payloads as an option? should offsets be turned on always?
return new ExtendedMemoryIndex(true, false, maxReuseBytes);
}
};
single = new SingleDocumentPercolatorIndex(cache);
multi = new MultiDocumentPercolatorIndex(cache);
percolatorTypes = new IntObjectHashMap<>(6);
percolatorTypes.put(countPercolator.id(), countPercolator);
percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
percolatorTypes.put(matchPercolator.id(), matchPercolator);
percolatorTypes.put(queryPercolator.id(), queryPercolator);
percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}
public ReduceResult reduce(byte percolatorTypeId, List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
PercolatorType percolatorType = percolatorTypes.get(percolatorTypeId);
return percolatorType.reduce(shardResults, headersContext);
}
public PercolateShardResponse percolate(PercolateShardRequest request) {
IndexService percolateIndexService = indicesService.indexServiceSafe(request.shardId().getIndex());
IndexShard indexShard = percolateIndexService.shardSafe(request.shardId().id());
indexShard.readAllowed(); // check if we can read the shard...
ShardPercolateService shardPercolateService = indexShard.shardPercolateService();
shardPercolateService.prePercolate();
long startTime = System.nanoTime();
// TODO: The filteringAliases should be looked up at the coordinating node and serialized with all shard request,
// just like is done in other apis.
String[] filteringAliases = indexNameExpressionResolver.filteringAliases(
clusterService.state(),
indexShard.shardId().index().name(),
request.indices()
);
Query aliasFilter = percolateIndexService.aliasesService().aliasFilter(filteringAliases);
SearchShardTarget searchShardTarget = new SearchShardTarget(clusterService.localNode().id(), request.shardId().getIndex(), request.shardId().id());
final PercolateContext context = new PercolateContext(
request, searchShardTarget, indexShard, percolateIndexService, pageCacheRecycler, bigArrays, scriptService, aliasFilter, parseFieldMatcher, clusterService.state()
);
// Some queries (function_score query when for decay functions) rely on a SearchContext being set:
// We switch types because this context needs to be in the context of the percolate queries in the shard and
// not the in memory percolate doc
String[] previousTypes = context.types();
context.types(new String[]{TYPE_NAME});
SearchContext.setCurrent(context);
try {
ParsedDocument parsedDocument = parseRequest(percolateIndexService, request, context, request.shardId().getIndex());
if (context.percolateQueries().isEmpty()) {
return new PercolateShardResponse(context, request.shardId());
}
if (request.docSource() != null && request.docSource().length() != 0) {
parsedDocument = parseFetchedDoc(context, request.docSource(), percolateIndexService, request.shardId().getIndex(), request.documentType());
} else if (parsedDocument == null) {
throw new IllegalArgumentException("Nothing to percolate");
}
if (context.percolateQuery() == null && (context.trackScores() || context.doSort || context.aggregations() != null) || context.aliasFilter() != null) {
context.percolateQuery(new MatchAllDocsQuery());
}
if (context.doSort && !context.limit) {
throw new IllegalArgumentException("Can't sort if size isn't specified");
}
if (context.highlight() != null && !context.limit) {
throw new IllegalArgumentException("Can't highlight if size isn't specified");
}
if (context.size() < 0) {
context.size(0);
}
// parse the source either into one MemoryIndex, if it is a single document or index multiple docs if nested
PercolatorIndex percolatorIndex;
boolean isNested = indexShard.mapperService().documentMapper(request.documentType()).hasNestedObjects();
if (parsedDocument.docs().size() > 1) {
assert isNested;
percolatorIndex = multi;
} else {
percolatorIndex = single;
}
PercolatorType action;
if (request.onlyCount()) {
action = context.percolateQuery() != null ? queryCountPercolator : countPercolator;
} else {
if (context.doSort) {
action = topMatchingPercolator;
} else if (context.percolateQuery() != null) {
action = context.trackScores() ? scoringPercolator : queryPercolator;
} else {
action = matchPercolator;
}
}
context.percolatorTypeId = action.id();
percolatorIndex.prepare(context, parsedDocument);
return action.doPercolate(request, context, isNested);
} finally {
context.types(previousTypes);
SearchContext.removeCurrent();
context.close();
shardPercolateService.postPercolate(System.nanoTime() - startTime);
}
}
private ParsedDocument parseRequest(IndexService documentIndexService, PercolateShardRequest request, PercolateContext context, String index) {
BytesReference source = request.source();
if (source == null || source.length() == 0) {
return null;
}
// TODO: combine all feature parse elements into one map
Map<String, ? extends SearchParseElement> hlElements = highlightPhase.parseElements();
Map<String, ? extends SearchParseElement> aggregationElements = aggregationPhase.parseElements();
ParsedDocument doc = null;
XContentParser parser = null;
try {
parser = XContentFactory.xContent(source).createParser(source);
String currentFieldName = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
// we need to check the "doc" here, so the next token will be START_OBJECT which is
// the actual document starting
if ("doc".equals(currentFieldName)) {
if (doc != null) {
throw new ElasticsearchParseException("Either specify doc or get, not both");
}
MapperService mapperService = documentIndexService.mapperService();
DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(request.documentType());
doc = docMapper.getDocumentMapper().parse(source(parser).index(index).type(request.documentType()).flyweight(true));
if (docMapper.getMapping() != null) {
doc.addDynamicMappingsUpdate(docMapper.getMapping());
}
if (doc.dynamicMappingsUpdate() != null) {
mappingUpdatedAction.updateMappingOnMasterSynchronously(request.shardId().getIndex(), request.documentType(), doc.dynamicMappingsUpdate());
}
// the document parsing exists the "doc" object, so we need to set the new current field.
currentFieldName = parser.currentName();
}
} else if (token == XContentParser.Token.START_OBJECT) {
SearchParseElement element = hlElements.get(currentFieldName);
if (element == null) {
element = aggregationElements.get(currentFieldName);
}
if ("query".equals(currentFieldName)) {
if (context.percolateQuery() != null) {
throw new ElasticsearchParseException("Either specify query or filter, not both");
}
context.percolateQuery(documentIndexService.queryParserService().parse(parser).query());
} else if ("filter".equals(currentFieldName)) {
if (context.percolateQuery() != null) {
throw new ElasticsearchParseException("Either specify query or filter, not both");
}
ParsedQuery parsedQuery = documentIndexService.queryParserService().parseInnerFilter(parser);
if(parsedQuery != null) {
context.percolateQuery(new ConstantScoreQuery(parsedQuery.query()));
}
} else if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
} else if (element != null) {
element.parse(parser, context);
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
}
} else if (token == null) {
break;
} else if (token.isValue()) {
if ("size".equals(currentFieldName)) {
context.size(parser.intValue());
if (context.size() < 0) {
throw new ElasticsearchParseException("size is set to [{}] and is expected to be higher or equal to 0", context.size());
}
} else if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
} else if ("track_scores".equals(currentFieldName) || "trackScores".equals(currentFieldName)) {
context.trackScores(parser.booleanValue());
}
}
}
// We need to get the actual source from the request body for highlighting, so parse the request body again
// and only get the doc source.
if (context.highlight() != null) {
parser.close();
currentFieldName = null;
parser = XContentFactory.xContent(source).createParser(source);
token = parser.nextToken();
assert token == XContentParser.Token.START_OBJECT;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_OBJECT) {
if ("doc".equals(currentFieldName)) {
BytesStreamOutput bStream = new BytesStreamOutput();
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.SMILE, bStream);
builder.copyCurrentStructure(parser);
builder.close();
doc.setSource(bStream.bytes());
break;
} else {
parser.skipChildren();
}
} else if (token == null) {
break;
}
}
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
return doc;
}
private void parseSort(XContentParser parser, PercolateContext context) throws Exception {
sortParseElement.parse(parser, context);
// null, means default sorting by relevancy
if (context.sort() == null) {
context.doSort = true;
} else {
throw new ElasticsearchParseException("Only _score desc is supported");
}
}
private ParsedDocument parseFetchedDoc(PercolateContext context, BytesReference fetchedDoc, IndexService documentIndexService, String index, String type) {
ParsedDocument doc = null;
XContentParser parser = null;
try {
parser = XContentFactory.xContent(fetchedDoc).createParser(fetchedDoc);
MapperService mapperService = documentIndexService.mapperService();
DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(type);
doc = docMapper.getDocumentMapper().parse(source(parser).index(index).type(type).flyweight(true));
if (context.highlight() != null) {
doc.setSource(fetchedDoc);
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
if (doc == null) {
throw new ElasticsearchParseException("No doc to percolate in the request");
}
return doc;
}
public void close() {
cache.close();
}
interface PercolatorType {
// 0x00 is reserved for empty type.
byte id();
ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext);
PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested);
}
private final PercolatorType countPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x01;
}
@Override
public ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
long finalCount = 0;
for (PercolateShardResponse shardResponse : shardResults) {
finalCount += shardResponse.count();
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(finalCount, reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
try {
Query existsQuery = entry.getValue();
if (isNested) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
if (Lucene.exists(context.docSearcher(), existsQuery)) {
count ++;
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
}
}
return new PercolateShardResponse(count, context, request.shardId());
}
};
private final PercolatorType queryCountPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x02;
}
@Override
public ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
return countPercolator.reduce(shardResults, headersContext);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
Count countCollector = count(logger, context, isNested);
queryBasedPercolating(percolatorSearcher, context, countCollector, request);
count = countCollector.counter();
} catch (Throwable e) {
logger.warn("failed to execute", e);
} finally {
percolatorSearcher.close();
}
return new PercolateShardResponse(count, context, request.shardId());
}
};
private final PercolatorType matchPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x03;
}
@Override
public ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
long foundMatches = 0;
int numMatches = 0;
for (PercolateShardResponse response : shardResults) {
foundMatches += response.count();
numMatches += response.matches().length;
}
int requestedSize = shardResults.get(0).requestedSize();
// Use a custom impl of AbstractBigArray for Object[]?
List<PercolateResponse.Match> finalMatches = new ArrayList<>(requestedSize == 0 ? numMatches : requestedSize);
outer:
for (PercolateShardResponse response : shardResults) {
Text index = new Text(response.getIndex());
for (int i = 0; i < response.matches().length; i++) {
float score = response.scores().length == 0 ? NO_SCORE : response.scores()[i];
Text match = new Text(new BytesArray(response.matches()[i]));
Map<String, HighlightField> hl = response.hls().isEmpty() ? null : response.hls().get(i);
finalMatches.add(new PercolateResponse.Match(index, match, score, hl));
if (requestedSize != 0 && finalMatches.size() == requestedSize) {
break outer;
}
}
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(foundMatches, finalMatches.toArray(new PercolateResponse.Match[finalMatches.size()]), reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
List<BytesRef> matches = new ArrayList<>();
List<Map<String, HighlightField>> hls = new ArrayList<>();
Query tokenRangeQuery = clusterService.getTokenRangesService().getTokenRangesQuery(request.tokenRanges());
for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
if (context.highlight() != null) {
context.parsedQuery(new ParsedQuery(entry.getValue()));
context.hitContext().cache().clear();
}
try {
Query existsQuery = entry.getValue();
if (isNested) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.add(tokenRangeQuery, Occur.FILTER)
.build();
} else {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(tokenRangeQuery, Occur.FILTER)
.build();
}
if (Lucene.exists(context.docSearcher(), existsQuery)) {
if (!context.limit || count < context.size()) {
matches.add(entry.getKey());
if (context.highlight() != null) {
highlightPhase.hitExecute(context, context.hitContext());
hls.add(context.hitContext().hit().getHighlightFields());
}
}
count++;
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
}
}
BytesRef[] finalMatches = matches.toArray(new BytesRef[matches.size()]);
return new PercolateShardResponse(finalMatches, hls, count, context, request.shardId());
}
};
private final PercolatorType queryPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x04;
}
@Override
public ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
return matchPercolator.reduce(shardResults, headersContext);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
Match match = match(logger, context, highlightPhase, isNested);
queryBasedPercolating(percolatorSearcher, context, match, request);
List<BytesRef> matches = match.matches();
List<Map<String, HighlightField>> hls = match.hls();
long count = match.counter();
BytesRef[] finalMatches = matches.toArray(new BytesRef[matches.size()]);
return new PercolateShardResponse(finalMatches, hls, count, context, request.shardId());
} catch (Throwable e) {
logger.debug("failed to execute", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
} finally {
percolatorSearcher.close();
}
}
};
private final PercolatorType scoringPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x05;
}
@Override
public ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
return matchPercolator.reduce(shardResults, headersContext);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
MatchAndScore matchAndScore = matchAndScore(logger, context, highlightPhase, isNested);
queryBasedPercolating(percolatorSearcher, context, matchAndScore, request);
List<BytesRef> matches = matchAndScore.matches();
List<Map<String, HighlightField>> hls = matchAndScore.hls();
float[] scores = matchAndScore.scores().toArray();
long count = matchAndScore.counter();
BytesRef[] finalMatches = matches.toArray(new BytesRef[matches.size()]);
return new PercolateShardResponse(finalMatches, hls, count, scores, context, request.shardId());
} catch (Throwable e) {
logger.debug("failed to execute", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
} finally {
percolatorSearcher.close();
}
}
};
private final PercolatorType topMatchingPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x06;
}
@Override
public ReduceResult reduce(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
long foundMatches = 0;
int nonEmptyResponses = 0;
int firstNonEmptyIndex = 0;
for (int i = 0; i < shardResults.size(); i++) {
PercolateShardResponse response = shardResults.get(i);
foundMatches += response.count();
if (response.matches().length != 0) {
if (firstNonEmptyIndex == 0) {
firstNonEmptyIndex = i;
}
nonEmptyResponses++;
}
}
int requestedSize = shardResults.get(0).requestedSize();
// Use a custom impl of AbstractBigArray for Object[]?
List<PercolateResponse.Match> finalMatches = new ArrayList<>(requestedSize);
if (nonEmptyResponses == 1) {
PercolateShardResponse response = shardResults.get(firstNonEmptyIndex);
Text index = new Text(response.getIndex());
for (int i = 0; i < response.matches().length; i++) {
float score = response.scores().length == 0 ? Float.NaN : response.scores()[i];
Text match = new Text(new BytesArray(response.matches()[i]));
if (!response.hls().isEmpty()) {
Map<String, HighlightField> hl = response.hls().get(i);
finalMatches.add(new PercolateResponse.Match(index, match, score, hl));
} else {
finalMatches.add(new PercolateResponse.Match(index, match, score));
}
}
} else {
int[] slots = new int[shardResults.size()];
while (true) {
float lowestScore = Float.NEGATIVE_INFINITY;
int requestIndex = -1;
int itemIndex = -1;
for (int i = 0; i < shardResults.size(); i++) {
int scoreIndex = slots[i];
float[] scores = shardResults.get(i).scores();
if (scoreIndex >= scores.length) {
continue;
}
float score = scores[scoreIndex];
int cmp = Float.compare(lowestScore, score);
// TODO: Maybe add a tie?
if (cmp < 0) {
requestIndex = i;
itemIndex = scoreIndex;
lowestScore = score;
}
}
// This means the shard matches have been exhausted and we should bail
if (requestIndex == -1) {
break;
}
slots[requestIndex]++;
PercolateShardResponse shardResponse = shardResults.get(requestIndex);
Text index = new Text(shardResponse.getIndex());
Text match = new Text(new BytesArray(shardResponse.matches()[itemIndex]));
float score = shardResponse.scores()[itemIndex];
if (!shardResponse.hls().isEmpty()) {
Map<String, HighlightField> hl = shardResponse.hls().get(itemIndex);
finalMatches.add(new PercolateResponse.Match(index, match, score, hl));
} else {
finalMatches.add(new PercolateResponse.Match(index, match, score));
}
if (finalMatches.size() == requestedSize) {
break;
}
}
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(foundMatches, finalMatches.toArray(new PercolateResponse.Match[finalMatches.size()]), reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
MatchAndSort matchAndSort = QueryCollector.matchAndSort(logger, context, isNested);
queryBasedPercolating(percolatorSearcher, context, matchAndSort, request);
TopDocs topDocs = matchAndSort.topDocs();
long count = topDocs.totalHits;
List<BytesRef> matches = new ArrayList<>(topDocs.scoreDocs.length);
float[] scores = new float[topDocs.scoreDocs.length];
List<Map<String, HighlightField>> hls = null;
if (context.highlight() != null) {
hls = new ArrayList<>(topDocs.scoreDocs.length);
}
final MappedFieldType uidMapper = context.mapperService().smartNameFieldType(UidFieldMapper.NAME);
final IndexFieldData<?> uidFieldData = context.fieldData().getForField(uidMapper);
int i = 0;
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int segmentIdx = ReaderUtil.subIndex(scoreDoc.doc, percolatorSearcher.reader().leaves());
LeafReaderContext atomicReaderContext = percolatorSearcher.reader().leaves().get(segmentIdx);
SortedBinaryDocValues values = uidFieldData.load(atomicReaderContext).getBytesValues();
final int localDocId = scoreDoc.doc - atomicReaderContext.docBase;
values.setDocument(localDocId);
final int numValues = values.count();
assert numValues == 1;
BytesRef bytes = Uid.splitUidIntoTypeAndId(values.valueAt(0))[1];
matches.add(BytesRef.deepCopyOf(bytes));
if (hls != null) {
Query query = context.percolateQueries().get(bytes);
context.parsedQuery(new ParsedQuery(query));
context.hitContext().cache().clear();
highlightPhase.hitExecute(context, context.hitContext());
hls.add(i, context.hitContext().hit().getHighlightFields());
}
scores[i++] = scoreDoc.score;
}
if (hls != null) {
return new PercolateShardResponse(matches.toArray(new BytesRef[matches.size()]), hls, count, scores, context, request.shardId());
} else {
return new PercolateShardResponse(matches.toArray(new BytesRef[matches.size()]), count, scores, context, request.shardId());
}
} catch (Throwable e) {
logger.debug("failed to execute", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
} finally {
percolatorSearcher.close();
}
}
};
private void queryBasedPercolating(Engine.Searcher percolatorSearcher, PercolateContext context, QueryCollector percolateCollector, PercolateShardRequest request) throws IOException {
Query percolatorTypeFilter = context.indexService().mapperService().documentMapper(TYPE_NAME).typeFilter();
if (logger.isDebugEnabled())
logger.debug("precolate within tokenRanges = {}", request.tokenRanges());
Query tokenRangeQuery = clusterService.getTokenRangesService().getTokenRangesQuery(request.tokenRanges());
final Query filter;
if (context.aliasFilter() != null) {
BooleanQuery.Builder booleanFilter = new BooleanQuery.Builder();
booleanFilter.add(context.aliasFilter(), BooleanClause.Occur.MUST);
booleanFilter.add(percolatorTypeFilter, BooleanClause.Occur.MUST);
if (tokenRangeQuery != null)
booleanFilter.add(tokenRangeQuery, Occur.FILTER);
filter = booleanFilter.build();
} else {
if (tokenRangeQuery != null) {
BooleanQuery.Builder booleanFilter = new BooleanQuery.Builder();
booleanFilter.add(percolatorTypeFilter, BooleanClause.Occur.MUST);
booleanFilter.add(tokenRangeQuery, Occur.FILTER);
filter = booleanFilter.build();
} else {
filter = percolatorTypeFilter;
}
}
Query query = Queries.filtered(context.percolateQuery(), filter);
percolatorSearcher.searcher().search(query, percolateCollector);
percolateCollector.aggregatorCollector.postCollection();
if (context.aggregations() != null) {
aggregationPhase.execute(context);
}
}
public final static class ReduceResult {
private final long count;
private final PercolateResponse.Match[] matches;
private final InternalAggregations reducedAggregations;
ReduceResult(long count, PercolateResponse.Match[] matches, InternalAggregations reducedAggregations) {
this.count = count;
this.matches = matches;
this.reducedAggregations = reducedAggregations;
}
public ReduceResult(long count, InternalAggregations reducedAggregations) {
this.count = count;
this.matches = null;
this.reducedAggregations = reducedAggregations;
}
public long count() {
return count;
}
public PercolateResponse.Match[] matches() {
return matches;
}
public InternalAggregations reducedAggregations() {
return reducedAggregations;
}
}
private InternalAggregations reduceAggregations(List<PercolateShardResponse> shardResults, HasContextAndHeaders headersContext) {
if (shardResults.get(0).aggregations() == null) {
return null;
}
List<InternalAggregations> aggregationsList = new ArrayList<>(shardResults.size());
for (PercolateShardResponse shardResult : shardResults) {
aggregationsList.add(shardResult.aggregations());
}
InternalAggregations aggregations = InternalAggregations.reduce(aggregationsList, new ReduceContext(bigArrays, scriptService,
headersContext));
if (aggregations != null) {
List<SiblingPipelineAggregator> pipelineAggregators = shardResults.get(0).pipelineAggregators();
if (pipelineAggregators != null) {
List<InternalAggregation> newAggs = new ArrayList<>(eagerTransform(aggregations.asList(), PipelineAggregator.AGGREGATION_TRANFORM_FUNCTION));
for (SiblingPipelineAggregator pipelineAggregator : pipelineAggregators) {
InternalAggregation newAgg = pipelineAggregator.doReduce(new InternalAggregations(newAggs), new ReduceContext(
bigArrays, scriptService, headersContext));
newAggs.add(newAgg);
}
aggregations = new InternalAggregations(newAggs);
}
}
return aggregations;
}
}