/* * Licensed to Crate under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. Crate licenses this file * to you under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * However, if you have executed another commercial license agreement * with Crate these terms will supersede the license and you may use the * software solely pursuant to the terms of the relevant commercial * agreement. */ package io.crate.operation.collect.collectors; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterables; import io.crate.analyze.OrderBy; import io.crate.analyze.symbol.Symbol; import io.crate.data.Input; import io.crate.data.Row; import io.crate.lucene.FieldTypeLookup; import io.crate.metadata.Reference; import io.crate.operation.merge.KeyIterable; import io.crate.operation.reference.doc.lucene.CollectorContext; import io.crate.operation.reference.doc.lucene.LuceneCollectorExpression; import io.crate.operation.reference.doc.lucene.LuceneMissingValue; import org.apache.lucene.search.*; import org.apache.logging.log4j.Logger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.lucene.MinimumScoreCollector; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.shard.ShardId; import javax.annotation.Nullable; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.List; import static java.util.Objects.requireNonNull; public class LuceneOrderedDocCollector extends OrderedDocCollector { private static final Logger LOGGER = Loggers.getLogger(LuceneOrderedDocCollector.class); private final Query query; private final Float minScore; private final boolean doDocsScores; private final int batchSize; private final FieldTypeLookup fieldTypeLookup; private final CollectorContext collectorContext; private final OrderBy orderBy; private final Sort sort; private final Collection<? extends LuceneCollectorExpression<?>> expressions; private final ScoreDocRowFunction rowFunction; private final DummyScorer scorer; private final IndexSearcher searcher; private final Object[] missingValues; @Nullable private volatile FieldDoc lastDoc = null; public LuceneOrderedDocCollector(ShardId shardId, IndexSearcher searcher, Query query, Float minScore, boolean doDocsScores, int batchSize, FieldTypeLookup fieldTypeLookup, CollectorContext collectorContext, OrderBy orderBy, Sort sort, List<? extends Input<?>> inputs, Collection<? extends LuceneCollectorExpression<?>> expressions) { super(shardId); this.searcher = searcher; this.query = query; this.minScore = minScore; this.doDocsScores = doDocsScores; this.batchSize = batchSize; this.fieldTypeLookup = fieldTypeLookup; this.collectorContext = collectorContext; this.orderBy = orderBy; this.sort = sort; this.scorer = new DummyScorer(); this.expressions = expressions; this.rowFunction = new ScoreDocRowFunction( searcher.getIndexReader(), inputs, expressions, scorer ); missingValues = new Object[orderBy.orderBySymbols().size()]; for (int i = 0; i < orderBy.orderBySymbols().size(); i++) { missingValues[i] = LuceneMissingValue.missingValue(orderBy, i); } } /** * On the first call this will do an initial search and provide {@link #batchSize} number of rows * (or less if there aren't more available) * </p> * On subsequent calls it will return more rows (max {@link #batchSize} or less. * These rows are always the rows that come after the last row of the previously returned rows * <p/> * Basically, calling this function multiple times pages through the shard in batches. */ @Override public KeyIterable<ShardId, Row> collect() { try { if (lastDoc == null) { return initialSearch(); } return searchMore(); } catch (IOException e) { throw new RuntimeException(e); } } @Override public void close() { } private KeyIterable<ShardId, Row> initialSearch() throws IOException { for (LuceneCollectorExpression<?> expression : expressions) { expression.startCollect(collectorContext); expression.setScorer(scorer); } TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, batchSize, true, doDocsScores, doDocsScores); Collector collector = topFieldCollector; if (minScore != null) { collector = new MinimumScoreCollector(collector, minScore); } searcher.search(query, collector); return scoreDocToIterable(topFieldCollector.topDocs().scoreDocs); } private KeyIterable<ShardId, Row> searchMore() throws IOException { if (exhausted()) { LOGGER.trace("searchMore but EXHAUSTED"); return empty(); } LOGGER.debug("searchMore from [{}]", lastDoc); TopDocs topDocs = searcher.searchAfter(lastDoc, query(lastDoc), batchSize, sort, doDocsScores, false); return scoreDocToIterable(topDocs.scoreDocs); } private KeyIterable<ShardId, Row> scoreDocToIterable(ScoreDoc[] scoreDocs) { exhausted = scoreDocs.length < batchSize; if (scoreDocs.length > 0) { lastDoc = (FieldDoc) scoreDocs[scoreDocs.length - 1]; } return new KeyIterable<>(shardId(), Iterables.transform(Arrays.asList(scoreDocs), rowFunction)); } private Query query(FieldDoc lastDoc) { Query query = nextPageQuery(lastDoc, orderBy, missingValues, fieldTypeLookup); if (query == null) { return this.query; } BooleanQuery.Builder searchAfterQuery = new BooleanQuery.Builder(); searchAfterQuery.add(this.query, BooleanClause.Occur.MUST); searchAfterQuery.add(query, BooleanClause.Occur.MUST_NOT); return searchAfterQuery.build(); } @Nullable @VisibleForTesting static Query nextPageQuery(FieldDoc lastCollected, OrderBy orderBy, Object[] missingValues, FieldTypeLookup fieldTypeLookup) { BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); for (int i = 0; i < orderBy.orderBySymbols().size(); i++) { Symbol order = orderBy.orderBySymbols().get(i); Object value = lastCollected.fields[i]; if (order instanceof Reference) { boolean nullsFirst = orderBy.nullsFirst()[i] == null ? false : orderBy.nullsFirst()[i]; value = value == null || value.equals(missingValues[i]) ? null : value; if (nullsFirst && value == null) { // no filter needed continue; } String columnName = ((Reference) order).ident().columnIdent().fqn(); MappedFieldType fieldType = requireNonNull( fieldTypeLookup.get(columnName), "Column must exist: " + columnName); Query orderQuery; // nulls already gone, so they should be excluded if (nullsFirst) { BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); if (orderBy.reverseFlags()[i]) { booleanQuery.add(fieldType.rangeQuery(null, value, false, true), BooleanClause.Occur.MUST_NOT); } else { booleanQuery.add(fieldType.rangeQuery(value, null, true, false), BooleanClause.Occur.MUST_NOT); } orderQuery = booleanQuery.build(); } else { if (orderBy.reverseFlags()[i]) { orderQuery = fieldType.rangeQuery(value, null, false, false); } else { orderQuery = fieldType.rangeQuery(null, value, false, false); } } queryBuilder.add(orderQuery, BooleanClause.Occur.MUST); } } BooleanQuery query = queryBuilder.build(); if (query.clauses().size() > 0) { return query; } else { return null; } } }