package org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.PrintStream; import java.util.*; import org.apache.lucene.index.*; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.FieldCacheSanityChecker; /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. * * <p>Created: May 19, 2004 4:40:36 PM * * @since lucene 1.4 */ class FieldCacheImpl implements FieldCache { private Map<Class<?>,Cache> caches; FieldCacheImpl() { init(); } private synchronized void init() { caches = new HashMap<Class<?>,Cache>(7); caches.put(Byte.TYPE, new ByteCache(this)); caches.put(Short.TYPE, new ShortCache(this)); caches.put(Integer.TYPE, new IntCache(this)); caches.put(Float.TYPE, new FloatCache(this)); caches.put(Long.TYPE, new LongCache(this)); caches.put(Double.TYPE, new DoubleCache(this)); caches.put(DocTermsIndex.class, new DocTermsIndexCache(this)); caches.put(DocTerms.class, new DocTermsCache(this)); } public synchronized void purgeAllCaches() { init(); } public synchronized void purge(IndexReader r) { for(Cache c : caches.values()) { c.purge(r); } } public synchronized CacheEntry[] getCacheEntries() { List<CacheEntry> result = new ArrayList<CacheEntry>(17); for(final Map.Entry<Class<?>,Cache> cacheEntry: caches.entrySet()) { final Cache cache = cacheEntry.getValue(); final Class<?> cacheType = cacheEntry.getKey(); synchronized(cache.readerCache) { for (final Map.Entry<Object,Map<Entry, Object>> readerCacheEntry : cache.readerCache.entrySet()) { final Object readerKey = readerCacheEntry.getKey(); if (readerKey == null) continue; final Map<Entry, Object> innerCache = readerCacheEntry.getValue(); for (final Map.Entry<Entry, Object> mapEntry : innerCache.entrySet()) { Entry entry = mapEntry.getKey(); result.add(new CacheEntryImpl(readerKey, entry.field, cacheType, entry.custom, mapEntry.getValue())); } } } } return result.toArray(new CacheEntry[result.size()]); } private static final class CacheEntryImpl extends CacheEntry { private final Object readerKey; private final String fieldName; private final Class<?> cacheType; private final Object custom; private final Object value; CacheEntryImpl(Object readerKey, String fieldName, Class<?> cacheType, Object custom, Object value) { this.readerKey = readerKey; this.fieldName = fieldName; this.cacheType = cacheType; this.custom = custom; this.value = value; // :HACK: for testing. // if (null != locale || SortField.CUSTOM != sortFieldType) { // throw new RuntimeException("Locale/sortFieldType: " + this); // } } @Override public Object getReaderKey() { return readerKey; } @Override public String getFieldName() { return fieldName; } @Override public Class<?> getCacheType() { return cacheType; } @Override public Object getCustom() { return custom; } @Override public Object getValue() { return value; } } /** * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops * processing terms and returns the current FieldCache * array. */ static final class StopFillCacheException extends RuntimeException { } /** Expert: Internal cache. */ abstract static class Cache { Cache() { this.wrapper = null; } Cache(FieldCache wrapper) { this.wrapper = wrapper; } final FieldCache wrapper; final Map<Object,Map<Entry,Object>> readerCache = new WeakHashMap<Object,Map<Entry,Object>>(); protected abstract Object createValue(IndexReader reader, Entry key) throws IOException; /** Remove this reader from the cache, if present. */ public void purge(IndexReader r) { Object readerKey = r.getCoreCacheKey(); synchronized(readerCache) { readerCache.remove(readerKey); } } public Object get(IndexReader reader, Entry key) throws IOException { Map<Entry,Object> innerCache; Object value; final Object readerKey = reader.getCoreCacheKey(); synchronized (readerCache) { innerCache = readerCache.get(readerKey); if (innerCache == null) { innerCache = new HashMap<Entry,Object>(); readerCache.put(readerKey, innerCache); value = null; } else { value = innerCache.get(key); } if (value == null) { value = new CreationPlaceholder(); innerCache.put(key, value); } } if (value instanceof CreationPlaceholder) { synchronized (value) { CreationPlaceholder progress = (CreationPlaceholder) value; if (progress.value == null) { progress.value = createValue(reader, key); synchronized (readerCache) { innerCache.put(key, progress.value); } // Only check if key.custom (the parser) is // non-null; else, we check twice for a single // call to FieldCache.getXXX if (key.custom != null && wrapper != null) { final PrintStream infoStream = wrapper.getInfoStream(); if (infoStream != null) { printNewInsanity(infoStream, progress.value); } } } return progress.value; } } return value; } private void printNewInsanity(PrintStream infoStream, Object value) { final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper); for(int i=0;i<insanities.length;i++) { final FieldCacheSanityChecker.Insanity insanity = insanities[i]; final CacheEntry[] entries = insanity.getCacheEntries(); for(int j=0;j<entries.length;j++) { if (entries[j].getValue() == value) { // OK this insanity involves our entry infoStream.println("WARNING: new FieldCache insanity created\nDetails: " + insanity.toString()); infoStream.println("\nStack:\n"); new Throwable().printStackTrace(infoStream); break; } } } } } /** Expert: Every composite-key in the internal cache is of this type. */ static class Entry { final String field; // which Fieldable final Object custom; // which custom comparator or parser /** Creates one of these objects for a custom comparator/parser. */ Entry (String field, Object custom) { this.field = StringHelper.intern(field); this.custom = custom; } /** Two of these are equal iff they reference the same field and type. */ @Override public boolean equals (Object o) { if (o instanceof Entry) { Entry other = (Entry) o; if (other.field == field) { if (other.custom == null) { if (custom == null) return true; } else if (other.custom.equals (custom)) { return true; } } } return false; } /** Composes a hashcode based on the field and type. */ @Override public int hashCode() { return field.hashCode() ^ (custom==null ? 0 : custom.hashCode()); } } // inherit javadocs public byte[] getBytes (IndexReader reader, String field) throws IOException { return getBytes(reader, field, null); } // inherit javadocs public byte[] getBytes(IndexReader reader, String field, ByteParser parser) throws IOException { return (byte[]) caches.get(Byte.TYPE).get(reader, new Entry(field, parser)); } static final class ByteCache extends Cache { ByteCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; ByteParser parser = (ByteParser) entry.custom; if (parser == null) { return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER); } final byte[] retArray = new byte[reader.maxDoc()]; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final byte termval = parser.parseByte(term); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; } } } catch (StopFillCacheException stop) { } } return retArray; } } // inherit javadocs public short[] getShorts (IndexReader reader, String field) throws IOException { return getShorts(reader, field, null); } // inherit javadocs public short[] getShorts(IndexReader reader, String field, ShortParser parser) throws IOException { return (short[]) caches.get(Short.TYPE).get(reader, new Entry(field, parser)); } static final class ShortCache extends Cache { ShortCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; ShortParser parser = (ShortParser) entry.custom; if (parser == null) { return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER); } final short[] retArray = new short[reader.maxDoc()]; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final short termval = parser.parseShort(term); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; } } } catch (StopFillCacheException stop) { } } return retArray; } } // inherit javadocs public int[] getInts (IndexReader reader, String field) throws IOException { return getInts(reader, field, null); } // inherit javadocs public int[] getInts(IndexReader reader, String field, IntParser parser) throws IOException { return (int[]) caches.get(Integer.TYPE).get(reader, new Entry(field, parser)); } static final class IntCache extends Cache { IntCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; IntParser parser = (IntParser) entry.custom; if (parser == null) { try { return wrapper.getInts(reader, field, DEFAULT_INT_PARSER); } catch (NumberFormatException ne) { return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER); } } int[] retArray = null; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final int termval = parser.parseInt(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new int[reader.maxDoc()]; } docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; } } } catch (StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new int[reader.maxDoc()]; } return retArray; } } // inherit javadocs public float[] getFloats (IndexReader reader, String field) throws IOException { return getFloats(reader, field, null); } // inherit javadocs public float[] getFloats(IndexReader reader, String field, FloatParser parser) throws IOException { return (float[]) caches.get(Float.TYPE).get(reader, new Entry(field, parser)); } static final class FloatCache extends Cache { FloatCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; FloatParser parser = (FloatParser) entry.custom; if (parser == null) { try { return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER); } catch (NumberFormatException ne) { return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); } } float[] retArray = null; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final float termval = parser.parseFloat(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new float[reader.maxDoc()]; } docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; } } } catch (StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new float[reader.maxDoc()]; } return retArray; } } public long[] getLongs(IndexReader reader, String field) throws IOException { return getLongs(reader, field, null); } // inherit javadocs public long[] getLongs(IndexReader reader, String field, FieldCache.LongParser parser) throws IOException { return (long[]) caches.get(Long.TYPE).get(reader, new Entry(field, parser)); } static final class LongCache extends Cache { LongCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entry) throws IOException { String field = entry.field; FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom; if (parser == null) { try { return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER); } catch (NumberFormatException ne) { return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); } } long[] retArray = null; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final long termval = parser.parseLong(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new long[reader.maxDoc()]; } docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; } } } catch (StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new long[reader.maxDoc()]; } return retArray; } } // inherit javadocs public double[] getDoubles(IndexReader reader, String field) throws IOException { return getDoubles(reader, field, null); } // inherit javadocs public double[] getDoubles(IndexReader reader, String field, FieldCache.DoubleParser parser) throws IOException { return (double[]) caches.get(Double.TYPE).get(reader, new Entry(field, parser)); } static final class DoubleCache extends Cache { DoubleCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entry.custom; if (parser == null) { try { return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); } catch (NumberFormatException ne) { return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); } } double[] retArray = null; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final double termval = parser.parseDouble(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new double[reader.maxDoc()]; } docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; } } } catch (StopFillCacheException stop) { } } if (retArray == null) // no values retArray = new double[reader.maxDoc()]; return retArray; } } public static class DocTermsIndexImpl extends DocTermsIndex { private final PagedBytes.Reader bytes; private final PackedInts.Reader termOrdToBytesOffset; private final PackedInts.Reader docToTermOrd; private final int numOrd; public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { this.bytes = bytes; this.docToTermOrd = docToTermOrd; this.termOrdToBytesOffset = termOrdToBytesOffset; this.numOrd = numOrd; } @Override public PackedInts.Reader getDocToOrd() { return docToTermOrd; } @Override public int numOrd() { return numOrd; } @Override public int getOrd(int docID) { return (int) docToTermOrd.get(docID); } @Override public int size() { return docToTermOrd.size(); } @Override public BytesRef lookup(int ord, BytesRef ret) { return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord)); } @Override public TermsEnum getTermsEnum() { return this.new DocTermsIndexEnum(); } class DocTermsIndexEnum extends TermsEnum { int currentOrd; int currentBlockNumber; int end; // end position in the current block final byte[][] blocks; final int[] blockEnds; final BytesRef term = new BytesRef(); public DocTermsIndexEnum() { currentOrd = 0; currentBlockNumber = 0; blocks = bytes.getBlocks(); blockEnds = bytes.getBlockEnds(); currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get(0)); end = blockEnds[currentBlockNumber]; } @Override public SeekStatus seek(BytesRef text, boolean useCache) throws IOException { // TODO - we can support with binary search throw new UnsupportedOperationException(); } @Override public SeekStatus seek(long ord) throws IOException { assert(ord >= 0 && ord <= numOrd); // TODO: if gap is small, could iterate from current position? Or let user decide that? currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get((int)ord)); end = blockEnds[currentBlockNumber]; currentOrd = (int)ord; return SeekStatus.FOUND; } @Override public BytesRef next() throws IOException { int start = term.offset + term.length; if (start >= end) { // switch byte blocks if (currentBlockNumber +1 >= blocks.length) { return null; } currentBlockNumber++; term.bytes = blocks[currentBlockNumber]; end = blockEnds[currentBlockNumber]; start = 0; if (end<=0) return null; // special case of empty last array } currentOrd++; byte[] block = term.bytes; if ((block[start] & 128) == 0) { term.length = block[start]; term.offset = start+1; } else { term.length = (((int) (block[start] & 0x7f)) << 8) | (block[1+start] & 0xff); term.offset = start+2; } return term; } @Override public BytesRef term() throws IOException { return term; } @Override public long ord() throws IOException { return currentOrd; } @Override public int docFreq() { throw new UnsupportedOperationException(); } @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { throw new UnsupportedOperationException(); } @Override public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { throw new UnsupportedOperationException(); } @Override public Comparator<BytesRef> getComparator() throws IOException { throw new UnsupportedOperationException(); } } } private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true; public DocTermsIndex getTermsIndex(IndexReader reader, String field) throws IOException { return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM); } public DocTermsIndex getTermsIndex(IndexReader reader, String field, boolean fasterButMoreRAM) throws IOException { return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM))); } static class DocTermsIndexCache extends Cache { DocTermsIndexCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { String field = StringHelper.intern(entryKey.field); Terms terms = MultiFields.getTerms(reader, field); final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue(); final PagedBytes bytes = new PagedBytes(15); int startBytesBPV; int startTermsBPV; int startNumUniqueTerms; int maxDoc = reader.maxDoc(); final int termCountHardLimit; if (maxDoc == Integer.MAX_VALUE) { termCountHardLimit = Integer.MAX_VALUE; } else { termCountHardLimit = maxDoc+1; } if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which // is fine -- GrowableWriter will reallocate as needed long numUniqueTerms = 0; try { numUniqueTerms = terms.getUniqueTermCount(); } catch (UnsupportedOperationException uoe) { numUniqueTerms = -1; } if (numUniqueTerms != -1) { if (numUniqueTerms > termCountHardLimit) { // app is misusing the API (there is more than // one term per doc); in this case we make best // effort to load what we can (see LUCENE-2142) numUniqueTerms = termCountHardLimit; } startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4); startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); startNumUniqueTerms = (int) numUniqueTerms; } else { startBytesBPV = 1; startTermsBPV = 1; startNumUniqueTerms = 1; } } else { startBytesBPV = 1; startTermsBPV = 1; startNumUniqueTerms = 1; } GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM); final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM); // 0 is reserved for "unset" bytes.copyUsingLengthPrefix(new BytesRef()); int termOrd = 1; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } if (termOrd >= termCountHardLimit) { break; } if (termOrd == termOrdToBytesOffset.size()) { // NOTE: this code only runs if the incoming // reader impl doesn't implement // getUniqueTermCount (which should be uncommon) termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); } termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } docToTermOrd.set(docID, termOrd); } termOrd++; } if (termOrdToBytesOffset.size() > termOrd) { termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd); } } // maybe an int-only impl? return new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); } } private static class DocTermsImpl extends DocTerms { private final PagedBytes.Reader bytes; private final PackedInts.Reader docToOffset; public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { this.bytes = bytes; this.docToOffset = docToOffset; } @Override public int size() { return docToOffset.size(); } @Override public boolean exists(int docID) { return docToOffset.get(docID) == 0; } @Override public BytesRef getTerm(int docID, BytesRef ret) { final int pointer = (int) docToOffset.get(docID); return bytes.fillUsingLengthPrefix(ret, pointer); } } // TODO: this if DocTermsIndex was already created, we // should share it... public DocTerms getTerms(IndexReader reader, String field) throws IOException { return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM); } public DocTerms getTerms(IndexReader reader, String field, boolean fasterButMoreRAM) throws IOException { return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM))); } static final class DocTermsCache extends Cache { DocTermsCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { String field = StringHelper.intern(entryKey.field); Terms terms = MultiFields.getTerms(reader, field); final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue(); final int termCountHardLimit = reader.maxDoc(); // Holds the actual term data, expanded. final PagedBytes bytes = new PagedBytes(15); int startBPV; if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which // is fine -- GrowableWriter will reallocate as needed long numUniqueTerms = 0; try { numUniqueTerms = terms.getUniqueTermCount(); } catch (UnsupportedOperationException uoe) { numUniqueTerms = -1; } if (numUniqueTerms != -1) { if (numUniqueTerms > termCountHardLimit) { numUniqueTerms = termCountHardLimit; } startBPV = PackedInts.bitsRequired(numUniqueTerms*4); } else { startBPV = 1; } } else { startBPV = 1; } final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM); // pointer==0 means not set bytes.copyUsingLengthPrefix(new BytesRef()); if (terms != null) { int termCount = 0; final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { if (termCount++ == termCountHardLimit) { // app is misusing the API (there is more than // one term per doc); in this case we make best // effort to load what we can (see LUCENE-2142) break; } final BytesRef term = termsEnum.next(); if (term == null) { break; } final long pointer = bytes.copyUsingLengthPrefix(term); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } docToOffset.set(docID, pointer); } } } // maybe an int-only impl? return new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable()); } } private volatile PrintStream infoStream; public void setInfoStream(PrintStream stream) { infoStream = stream; } public PrintStream getInfoStream() { return infoStream; } }