/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.common.lucene.index; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FilteredDocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Nullable; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * A frequency TermsEnum that returns frequencies derived from a collection of * cached leaf termEnums. It also allows to provide a filter to explicitly * compute frequencies only for docs that match the filter (heavier!). */ public class FilterableTermsEnum extends TermsEnum { static class Holder { final TermsEnum termsEnum; @Nullable PostingsEnum docsEnum; @Nullable final Bits bits; Holder(TermsEnum termsEnum, Bits bits) { this.termsEnum = termsEnum; this.bits = bits; } } static final String UNSUPPORTED_MESSAGE = "This TermsEnum only supports #seekExact(BytesRef) as well as #docFreq() and #totalTermFreq()"; protected static final int NOT_FOUND = -1; private final Holder[] enums; protected int currentDocFreq = 0; protected long currentTotalTermFreq = 0; protected BytesRef current; protected final int docsEnumFlag; public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException { if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) { throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag); } this.docsEnumFlag = docsEnumFlag; List<LeafReaderContext> leaves = reader.leaves(); List<Holder> enums = new ArrayList<>(leaves.size()); final Weight weight; if (filter == null) { weight = null; } else { final IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); weight = searcher.createNormalizedWeight(filter, false); } for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { continue; } BitSet bits = null; if (weight != null) { Scorer scorer = weight.scorer(context); if (scorer == null) { // fully filtered, none matching, no need to iterate on this continue; } DocIdSetIterator docs = scorer.iterator(); // we want to force apply deleted docs final Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { docs = new FilteredDocIdSetIterator(docs) { @Override protected boolean match(int doc) { return liveDocs.get(doc); } }; } bits = BitSet.of(docs, context.reader().maxDoc()); } enums.add(new Holder(termsEnum, bits)); } this.enums = enums.toArray(new Holder[enums.size()]); } @Override public BytesRef term() throws IOException { return current; } @Override public boolean seekExact(BytesRef text) throws IOException { int docFreq = 0; long totalTermFreq = 0; for (Holder anEnum : enums) { if (anEnum.termsEnum.seekExact(text)) { if (anEnum.bits == null) { docFreq += anEnum.termsEnum.docFreq(); if (docsEnumFlag == PostingsEnum.FREQS) { long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq(); if (totalTermFreq == -1 || leafTotalTermFreq == -1) { totalTermFreq = -1; continue; } totalTermFreq += leafTotalTermFreq; } } else { final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum, docsEnumFlag); // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not if (docsEnumFlag == PostingsEnum.FREQS) { for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (anEnum.bits != null && anEnum.bits.get(docId) == false) { continue; } docFreq++; // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value // is really 1 or unrecorded when filtering like this totalTermFreq += docsEnum.freq(); } } else { for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (anEnum.bits != null && anEnum.bits.get(docId) == false) { continue; } // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call docFreq++; } } } } } if (docFreq > 0) { currentDocFreq = docFreq; currentTotalTermFreq = totalTermFreq; current = text; return true; } else { currentDocFreq = NOT_FOUND; currentTotalTermFreq = NOT_FOUND; current = null; return false; } } @Override public int docFreq() throws IOException { return currentDocFreq; } @Override public long totalTermFreq() throws IOException { return currentTotalTermFreq; } @Override public void seekExact(long ord) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE); } @Override public SeekStatus seekCeil(BytesRef text) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE); } @Override public long ord() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE); } @Override public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE); } @Override public BytesRef next() throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE); } }