/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** Utility class to do efficient primary-key (only 1 doc contains the * given term) lookups by segment, re-using the enums. This class is * not thread safe, so it is the caller's job to create and use one * instance of this per thread. Do not use this if a term may appear * in more than one document! It will only return the first one it * finds. */ public class PerThreadPKLookup { protected final TermsEnum[] termsEnums; protected final PostingsEnum[] postingsEnums; protected final Bits[] liveDocs; protected final int[] docBases; protected final int numSegs; protected final boolean hasDeletions; public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException { List<LeafReaderContext> leaves = new ArrayList<>(r.leaves()); // Larger segments are more likely to have the id, so we sort largest to smallest by numDocs: Collections.sort(leaves, new Comparator<LeafReaderContext>() { @Override public int compare(LeafReaderContext c1, LeafReaderContext c2) { return c2.reader().numDocs() - c1.reader().numDocs(); } }); termsEnums = new TermsEnum[leaves.size()]; postingsEnums = new PostingsEnum[leaves.size()]; liveDocs = new Bits[leaves.size()]; docBases = new int[leaves.size()]; int numSegs = 0; boolean hasDeletions = false; for(int i=0;i<leaves.size();i++) { Terms terms = leaves.get(i).reader().terms(idFieldName); if (terms != null) { termsEnums[numSegs] = terms.iterator(); assert termsEnums[numSegs] != null; docBases[numSegs] = leaves.get(i).docBase; liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs(); hasDeletions |= leaves.get(i).reader().hasDeletions(); numSegs++; } } this.numSegs = numSegs; this.hasDeletions = hasDeletions; } /** Returns docID if found, else -1. */ public int lookup(BytesRef id) throws IOException { for(int seg=0;seg<numSegs;seg++) { if (termsEnums[seg].seekExact(id)) { postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0); int docID = postingsEnums[seg].nextDoc(); if (docID != PostingsEnum.NO_MORE_DOCS && (liveDocs[seg] == null || liveDocs[seg].get(docID))) { return docBases[seg] + docID; } assert hasDeletions; } } return -1; } // TODO: add reopen method to carry over re-used enums...? }