PercolateQuery.java example

Explorer
elasticsearch-master
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.percolator;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene;

import java.io.IOException;
import java.util.Objects;
import java.util.Set;

final class PercolateQuery extends Query implements Accountable {

    // cost of matching the query against the document, arbitrary as it would be really complex to estimate
    public static final float MATCH_COST = 1000;

    private final String documentType;
    private final QueryStore queryStore;
    private final BytesReference documentSource;
    private final Query candidateMatchesQuery;
    private final Query verifiedMatchesQuery;
    private final IndexSearcher percolatorIndexSearcher;

    PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
                          Query candidateMatchesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedMatchesQuery) {
        this.documentType = Objects.requireNonNull(documentType);
        this.documentSource = Objects.requireNonNull(documentSource);
        this.candidateMatchesQuery = Objects.requireNonNull(candidateMatchesQuery);
        this.queryStore = Objects.requireNonNull(queryStore);
        this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
        this.verifiedMatchesQuery = Objects.requireNonNull(verifiedMatchesQuery);
    }

    @Override
    public Query rewrite(IndexReader reader) throws IOException {
        Query rewritten = candidateMatchesQuery.rewrite(reader);
        if (rewritten != candidateMatchesQuery) {
            return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
                    verifiedMatchesQuery);
        } else {
            return this;
        }
    }

    @Override
    public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
        final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, false, boost);
        final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, false, boost);
        return new Weight(this) {
            @Override
            public void extractTerms(Set<Term> set) {
            }

            @Override
            public Explanation explain(LeafReaderContext leafReaderContext, int docId) throws IOException {
                Scorer scorer = scorer(leafReaderContext);
                if (scorer != null) {
                    TwoPhaseIterator twoPhaseIterator = scorer.twoPhaseIterator();
                    int result = twoPhaseIterator.approximation().advance(docId);
                    if (result == docId) {
                        if (twoPhaseIterator.matches()) {
                            if (needsScores) {
                                CheckedFunction<Integer, Query, IOException> percolatorQueries = queryStore.getQueries(leafReaderContext);
                                Query query = percolatorQueries.apply(docId);
                                Explanation detail = percolatorIndexSearcher.explain(query, 0);
                                return Explanation.match(scorer.score(), "PercolateQuery", detail);
                            } else {
                                return Explanation.match(scorer.score(), "PercolateQuery");
                            }
                        }
                    }
                }
                return Explanation.noMatch("PercolateQuery");
            }

            @Override
            public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException {
                final Scorer approximation = candidateMatchesWeight.scorer(leafReaderContext);
                if (approximation == null) {
                    return null;
                }

                final CheckedFunction<Integer, Query, IOException> queries = queryStore.getQueries(leafReaderContext);
                if (needsScores) {
                    return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {

                        float score;

                        @Override
                        boolean matchDocId(int docId) throws IOException {
                            Query query = percolatorQueries.apply(docId);
                            if (query != null) {
                                TopDocs topDocs = percolatorIndexSearcher.search(query, 1);
                                if (topDocs.totalHits > 0) {
                                    score = topDocs.scoreDocs[0].score;
                                    return true;
                                } else {
                                    return false;
                                }
                            } else {
                                return false;
                            }
                        }

                        @Override
                        public float score() throws IOException {
                            return score;
                        }
                    };
                } else {
                    Scorer verifiedDocsScorer = verifiedMatchesWeight.scorer(leafReaderContext);
                    Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
                    return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {

                        @Override
                        public float score() throws IOException {
                            return 0f;
                        }

                        boolean matchDocId(int docId) throws IOException {
                            // We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
                            // If docId also appears in the verifiedDocsBits then that means during indexing
                            // we were able to extract all query terms and for this candidate match
                            // and we determined based on the nature of the query that it is safe to skip
                            // the MemoryIndex verification.
                            if (verifiedDocsBits.get(docId)) {
                                return true;
                            }
                            Query query = percolatorQueries.apply(docId);
                            return query != null && Lucene.exists(percolatorIndexSearcher, query);
                        }
                    };
                }
            }
        };
    }

    public IndexSearcher getPercolatorIndexSearcher() {
        return percolatorIndexSearcher;
    }

    public String getDocumentType() {
        return documentType;
    }

    public BytesReference getDocumentSource() {
        return documentSource;
    }

    public QueryStore getQueryStore() {
        return queryStore;
    }

    // Comparing identity here to avoid being cached
    // Note that in theory if the same instance gets used multiple times it could still get cached,
    // however since we create a new query instance each time we this query this shouldn't happen and thus
    // this risk neglectable.
    @Override
    public boolean equals(Object o) {
        return this == o;
    }

    // Computing hashcode based on identity to avoid caching.
    @Override
    public int hashCode() {
        return System.identityHashCode(this);
    }

    @Override
    public String toString(String s) {
        return "PercolateQuery{document_type={" + documentType + "},document_source={" + documentSource.utf8ToString() +
                "},inner={" + candidateMatchesQuery.toString(s)  + "}}";
    }

    @Override
    public long ramBytesUsed() {
        return documentSource.ramBytesUsed();
    }

    @FunctionalInterface
    interface QueryStore {
        CheckedFunction<Integer, Query, IOException> getQueries(LeafReaderContext ctx) throws IOException;
    }

    abstract static class BaseScorer extends Scorer {

        final Scorer approximation;
        final CheckedFunction<Integer, Query, IOException> percolatorQueries;
        final IndexSearcher percolatorIndexSearcher;

        BaseScorer(Weight weight, Scorer approximation, CheckedFunction<Integer, Query, IOException> percolatorQueries,
                   IndexSearcher percolatorIndexSearcher) {
            super(weight);
            this.approximation = approximation;
            this.percolatorQueries = percolatorQueries;
            this.percolatorIndexSearcher = percolatorIndexSearcher;
        }

        @Override
        public final DocIdSetIterator iterator() {
            return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
        }

        @Override
        public final TwoPhaseIterator twoPhaseIterator() {
            return new TwoPhaseIterator(approximation.iterator()) {
                @Override
                public boolean matches() throws IOException {
                    return matchDocId(approximation.docID());
                }

                @Override
                public float matchCost() {
                    return MATCH_COST;
                }
            };
        }

        @Override
        public final int freq() throws IOException {
            return approximation.freq();
        }

        @Override
        public final int docID() {
            return approximation.docID();
        }

        abstract boolean matchDocId(int docId) throws IOException;

    }

}