package org.apache.lucene.search.cache; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; // TODO: this if DocTermsIndex was already created, we should share it... public class DocTermsCreator extends EntryCreatorWithOptions<DocTerms> { public static final int FASTER_BUT_MORE_RAM = 2; public String field; public DocTermsCreator( String field ) { super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM if( field == null ) { throw new IllegalArgumentException( "field can not be null" ); } this.field = field; } public DocTermsCreator( String field, int flags ) { super( flags ); if( field == null ) { throw new IllegalArgumentException( "field can not be null" ); } this.field = field; } @Override public SimpleEntryKey getCacheKey() { return new SimpleEntryKey( DocTermsCreator.class, field ); } @Override public DocTerms create(IndexReader reader) throws IOException { String field = StringHelper.intern(this.field); // TODO?? necessary? Terms terms = MultiFields.getTerms(reader, field); final boolean fasterButMoreRAM = hasOption( FASTER_BUT_MORE_RAM ); final int termCountHardLimit = reader.maxDoc(); // Holds the actual term data, expanded. final PagedBytes bytes = new PagedBytes(15); int startBPV; if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which // is fine -- GrowableWriter will reallocate as needed long numUniqueTerms = 0; try { numUniqueTerms = terms.getUniqueTermCount(); } catch (UnsupportedOperationException uoe) { numUniqueTerms = -1; } if (numUniqueTerms != -1) { if (numUniqueTerms > termCountHardLimit) { numUniqueTerms = termCountHardLimit; } startBPV = PackedInts.bitsRequired(numUniqueTerms*4); } else { startBPV = 1; } } else { startBPV = 1; } final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM); // pointer==0 means not set bytes.copyUsingLengthPrefix(new BytesRef()); if (terms != null) { int termCount = 0; final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { if (termCount++ == termCountHardLimit) { // app is misusing the API (there is more than // one term per doc); in this case we make best // effort to load what we can (see LUCENE-2142) break; } final BytesRef term = termsEnum.next(); if (term == null) { break; } final long pointer = bytes.copyUsingLengthPrefix(term); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } docToOffset.set(docID, pointer); } } } // maybe an int-only impl? return new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable()); } @Override public DocTerms validate(DocTerms entry, IndexReader reader) throws IOException { // TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM? return entry; } private static class DocTermsImpl extends DocTerms { private final PagedBytes.Reader bytes; private final PackedInts.Reader docToOffset; public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { this.bytes = bytes; this.docToOffset = docToOffset; } @Override public int size() { return docToOffset.size(); } @Override public boolean exists(int docID) { return docToOffset.get(docID) == 0; } @Override public BytesRef getTerm(int docID, BytesRef ret) { final long pointer = docToOffset.get(docID); return bytes.fill(ret, pointer); } } }