package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.WeakHashMap; import org.apache.lucene.index.DocTermOrds; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FieldCacheSanityChecker; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. * * @since lucene 1.4 */ class FieldCacheImpl implements FieldCache { private Map<Class<?>,Cache> caches; FieldCacheImpl() { init(); } private synchronized void init() { caches = new HashMap<Class<?>,Cache>(9); caches.put(Byte.TYPE, new ByteCache(this)); caches.put(Short.TYPE, new ShortCache(this)); caches.put(Integer.TYPE, new IntCache(this)); caches.put(Float.TYPE, new FloatCache(this)); caches.put(Long.TYPE, new LongCache(this)); caches.put(Double.TYPE, new DoubleCache(this)); caches.put(DocTerms.class, new DocTermsCache(this)); caches.put(DocTermsIndex.class, new DocTermsIndexCache(this)); caches.put(DocTermOrds.class, new DocTermOrdsCache(this)); caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this)); } public synchronized void purgeAllCaches() { init(); } public synchronized void purge(AtomicReader r) { for(Cache c : caches.values()) { c.purge(r); } } public synchronized CacheEntry[] getCacheEntries() { List<CacheEntry> result = new ArrayList<CacheEntry>(17); for(final Map.Entry<Class<?>,Cache> cacheEntry: caches.entrySet()) { final Cache cache = cacheEntry.getValue(); final Class<?> cacheType = cacheEntry.getKey(); synchronized(cache.readerCache) { for (final Map.Entry<Object,Map<Entry, Object>> readerCacheEntry : cache.readerCache.entrySet()) { final Object readerKey = readerCacheEntry.getKey(); if (readerKey == null) continue; final Map<Entry, Object> innerCache = readerCacheEntry.getValue(); for (final Map.Entry<Entry, Object> mapEntry : innerCache.entrySet()) { Entry entry = mapEntry.getKey(); result.add(new CacheEntryImpl(readerKey, entry.field, cacheType, entry.custom, mapEntry.getValue())); } } } } return result.toArray(new CacheEntry[result.size()]); } private static final class CacheEntryImpl extends CacheEntry { private final Object readerKey; private final String fieldName; private final Class<?> cacheType; private final Object custom; private final Object value; CacheEntryImpl(Object readerKey, String fieldName, Class<?> cacheType, Object custom, Object value) { this.readerKey = readerKey; this.fieldName = fieldName; this.cacheType = cacheType; this.custom = custom; this.value = value; // :HACK: for testing. // if (null != locale || SortField.CUSTOM != sortFieldType) { // throw new RuntimeException("Locale/sortFieldType: " + this); // } } @Override public Object getReaderKey() { return readerKey; } @Override public String getFieldName() { return fieldName; } @Override public Class<?> getCacheType() { return cacheType; } @Override public Object getCustom() { return custom; } @Override public Object getValue() { return value; } } /** * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops * processing terms and returns the current FieldCache * array. */ static final class StopFillCacheException extends RuntimeException { } // per-segment fieldcaches don't purge until the shared core closes. final SegmentReader.CoreClosedListener purgeCore = new SegmentReader.CoreClosedListener() { @Override public void onClose(SegmentReader owner) { FieldCacheImpl.this.purge(owner); } }; // composite/SlowMultiReaderWrapper fieldcaches don't purge until composite reader is closed. final IndexReader.ReaderClosedListener purgeReader = new IndexReader.ReaderClosedListener() { @Override public void onClose(IndexReader owner) { assert owner instanceof AtomicReader; FieldCacheImpl.this.purge((AtomicReader) owner); } }; private void initReader(AtomicReader reader) { if (reader instanceof SegmentReader) { ((SegmentReader) reader).addCoreClosedListener(purgeCore); } else { // we have a slow reader of some sort, try to register a purge event // rather than relying on gc: Object key = reader.getCoreCacheKey(); if (key instanceof AtomicReader) { ((AtomicReader)key).addReaderClosedListener(purgeReader); } else { // last chance reader.addReaderClosedListener(purgeReader); } } } /** Expert: Internal cache. */ abstract static class Cache { Cache(FieldCacheImpl wrapper) { this.wrapper = wrapper; } final FieldCacheImpl wrapper; final Map<Object,Map<Entry,Object>> readerCache = new WeakHashMap<Object,Map<Entry,Object>>(); protected abstract Object createValue(AtomicReader reader, Entry key, boolean setDocsWithField) throws IOException; /** Remove this reader from the cache, if present. */ public void purge(AtomicReader r) { Object readerKey = r.getCoreCacheKey(); synchronized(readerCache) { readerCache.remove(readerKey); } } /** Sets the key to the value for the provided reader; * if the key is already set then this doesn't change it. */ public void put(AtomicReader reader, Entry key, Object value) { final Object readerKey = reader.getCoreCacheKey(); synchronized (readerCache) { Map<Entry,Object> innerCache = readerCache.get(readerKey); if (innerCache == null) { // First time this reader is using FieldCache innerCache = new HashMap<Entry,Object>(); readerCache.put(readerKey, innerCache); wrapper.initReader(reader); } if (innerCache.get(key) == null) { innerCache.put(key, value); } else { // Another thread beat us to it; leave the current // value } } } public Object get(AtomicReader reader, Entry key, boolean setDocsWithField) throws IOException { Map<Entry,Object> innerCache; Object value; final Object readerKey = reader.getCoreCacheKey(); synchronized (readerCache) { innerCache = readerCache.get(readerKey); if (innerCache == null) { // First time this reader is using FieldCache innerCache = new HashMap<Entry,Object>(); readerCache.put(readerKey, innerCache); wrapper.initReader(reader); value = null; } else { value = innerCache.get(key); } if (value == null) { value = new CreationPlaceholder(); innerCache.put(key, value); } } if (value instanceof CreationPlaceholder) { synchronized (value) { CreationPlaceholder progress = (CreationPlaceholder) value; if (progress.value == null) { progress.value = createValue(reader, key, setDocsWithField); synchronized (readerCache) { innerCache.put(key, progress.value); } // Only check if key.custom (the parser) is // non-null; else, we check twice for a single // call to FieldCache.getXXX if (key.custom != null && wrapper != null) { final PrintStream infoStream = wrapper.getInfoStream(); if (infoStream != null) { printNewInsanity(infoStream, progress.value); } } } return progress.value; } } return value; } private void printNewInsanity(PrintStream infoStream, Object value) { final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper); for(int i=0;i<insanities.length;i++) { final FieldCacheSanityChecker.Insanity insanity = insanities[i]; final CacheEntry[] entries = insanity.getCacheEntries(); for(int j=0;j<entries.length;j++) { if (entries[j].getValue() == value) { // OK this insanity involves our entry infoStream.println("WARNING: new FieldCache insanity created\nDetails: " + insanity.toString()); infoStream.println("\nStack:\n"); new Throwable().printStackTrace(infoStream); break; } } } } } /** Expert: Every composite-key in the internal cache is of this type. */ static class Entry { final String field; // which Field final Object custom; // which custom comparator or parser /** Creates one of these objects for a custom comparator/parser. */ Entry (String field, Object custom) { this.field = field; this.custom = custom; } /** Two of these are equal iff they reference the same field and type. */ @Override public boolean equals (Object o) { if (o instanceof Entry) { Entry other = (Entry) o; if (other.field.equals(field)) { if (other.custom == null) { if (custom == null) return true; } else if (other.custom.equals (custom)) { return true; } } } return false; } /** Composes a hashcode based on the field and type. */ @Override public int hashCode() { return field.hashCode() ^ (custom==null ? 0 : custom.hashCode()); } } // inherit javadocs public byte[] getBytes (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { return getBytes(reader, field, null, setDocsWithField); } // inherit javadocs public byte[] getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField) throws IOException { return (byte[]) caches.get(Byte.TYPE).get(reader, new Entry(field, parser), setDocsWithField); } static final class ByteCache extends Cache { ByteCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField) throws IOException { String field = entryKey.field; ByteParser parser = (ByteParser) entryKey.custom; if (parser == null) { return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER, setDocsWithField); } final int maxDoc = reader.maxDoc(); final byte[] retArray = new byte[maxDoc]; Terms terms = reader.terms(field); FixedBitSet docsWithField = null; if (terms != null) { if (setDocsWithField) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc)); setDocsWithField = false; } } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final byte termval = parser.parseByte(term); docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } retArray[docID] = termval; if (setDocsWithField) { if (docsWithField == null) { // Lazy init docsWithField = new FixedBitSet(maxDoc); } docsWithField.set(docID); } } } } catch (FieldCache.StopFillCacheException stop) { } } if (setDocsWithField) { wrapper.setDocsWithField(reader, field, docsWithField); } return retArray; } } // inherit javadocs public short[] getShorts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { return getShorts(reader, field, null, setDocsWithField); } // inherit javadocs public short[] getShorts(AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField) throws IOException { return (short[]) caches.get(Short.TYPE).get(reader, new Entry(field, parser), setDocsWithField); } static final class ShortCache extends Cache { ShortCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField) throws IOException { String field = entryKey.field; ShortParser parser = (ShortParser) entryKey.custom; if (parser == null) { return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER, setDocsWithField); } final int maxDoc = reader.maxDoc(); final short[] retArray = new short[maxDoc]; Terms terms = reader.terms(field); FixedBitSet docsWithField = null; if (terms != null) { if (setDocsWithField) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc)); setDocsWithField = false; } } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final short termval = parser.parseShort(term); docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } retArray[docID] = termval; if (setDocsWithField) { if (docsWithField == null) { // Lazy init docsWithField = new FixedBitSet(maxDoc); } docsWithField.set(docID); } } } } catch (FieldCache.StopFillCacheException stop) { } } if (setDocsWithField) { wrapper.setDocsWithField(reader, field, docsWithField); } return retArray; } } // null Bits means no docs matched void setDocsWithField(AtomicReader reader, String field, Bits docsWithField) { final int maxDoc = reader.maxDoc(); final Bits bits; if (docsWithField == null) { bits = new Bits.MatchNoBits(maxDoc); } else if (docsWithField instanceof FixedBitSet) { final int numSet = ((FixedBitSet) docsWithField).cardinality(); if (numSet >= maxDoc) { // The cardinality of the BitSet is maxDoc if all documents have a value. assert numSet == maxDoc; bits = new Bits.MatchAllBits(maxDoc); } else { bits = docsWithField; } } else { bits = docsWithField; } caches.get(DocsWithFieldCache.class).put(reader, new Entry(field, null), bits); } // inherit javadocs public int[] getInts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { return getInts(reader, field, null, setDocsWithField); } // inherit javadocs public int[] getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField) throws IOException { return (int[]) caches.get(Integer.TYPE).get(reader, new Entry(field, parser), setDocsWithField); } static final class IntCache extends Cache { IntCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField) throws IOException { String field = entryKey.field; IntParser parser = (IntParser) entryKey.custom; if (parser == null) { try { return wrapper.getInts(reader, field, DEFAULT_INT_PARSER, setDocsWithField); } catch (NumberFormatException ne) { return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER, setDocsWithField); } } final int maxDoc = reader.maxDoc(); int[] retArray = null; Terms terms = reader.terms(field); FixedBitSet docsWithField = null; if (terms != null) { if (setDocsWithField) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc)); setDocsWithField = false; } } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final int termval = parser.parseInt(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new int[maxDoc]; } docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } retArray[docID] = termval; if (setDocsWithField) { if (docsWithField == null) { // Lazy init docsWithField = new FixedBitSet(maxDoc); } docsWithField.set(docID); } } } } catch (FieldCache.StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new int[maxDoc]; } if (setDocsWithField) { wrapper.setDocsWithField(reader, field, docsWithField); } return retArray; } } public Bits getDocsWithField(AtomicReader reader, String field) throws IOException { return (Bits) caches.get(DocsWithFieldCache.class).get(reader, new Entry(field, null), false); } static final class DocsWithFieldCache extends Cache { DocsWithFieldCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */) throws IOException { final String field = entryKey.field; FixedBitSet res = null; Terms terms = reader.terms(field); final int maxDoc = reader.maxDoc(); if (terms != null) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: return new Bits.MatchAllBits(maxDoc); } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } if (res == null) { // lazy init res = new FixedBitSet(maxDoc); } docs = termsEnum.docs(null, docs, 0); // TODO: use bulk API while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } res.set(docID); } } } if (res == null) { return new Bits.MatchNoBits(maxDoc); } final int numSet = res.cardinality(); if (numSet >= maxDoc) { // The cardinality of the BitSet is maxDoc if all documents have a value. assert numSet == maxDoc; return new Bits.MatchAllBits(maxDoc); } return res; } } // inherit javadocs public float[] getFloats (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { return getFloats(reader, field, null, setDocsWithField); } // inherit javadocs public float[] getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField) throws IOException { return (float[]) caches.get(Float.TYPE).get(reader, new Entry(field, parser), setDocsWithField); } static final class FloatCache extends Cache { FloatCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField) throws IOException { String field = entryKey.field; FloatParser parser = (FloatParser) entryKey.custom; if (parser == null) { try { return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER, setDocsWithField); } catch (NumberFormatException ne) { return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER, setDocsWithField); } } final int maxDoc = reader.maxDoc(); float[] retArray = null; Terms terms = reader.terms(field); FixedBitSet docsWithField = null; if (terms != null) { if (setDocsWithField) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc)); setDocsWithField = false; } } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final float termval = parser.parseFloat(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new float[maxDoc]; } docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } retArray[docID] = termval; if (setDocsWithField) { if (docsWithField == null) { // Lazy init docsWithField = new FixedBitSet(maxDoc); } docsWithField.set(docID); } } } } catch (FieldCache.StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new float[maxDoc]; } if (setDocsWithField) { wrapper.setDocsWithField(reader, field, docsWithField); } return retArray; } } public long[] getLongs(AtomicReader reader, String field, boolean setDocsWithField) throws IOException { return getLongs(reader, field, null, setDocsWithField); } // inherit javadocs public long[] getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField) throws IOException { return (long[]) caches.get(Long.TYPE).get(reader, new Entry(field, parser), setDocsWithField); } static final class LongCache extends Cache { LongCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField) throws IOException { String field = entryKey.field; FieldCache.LongParser parser = (FieldCache.LongParser) entryKey.custom; if (parser == null) { try { return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER, setDocsWithField); } catch (NumberFormatException ne) { return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER, setDocsWithField); } } final int maxDoc = reader.maxDoc(); long[] retArray = null; Terms terms = reader.terms(field); FixedBitSet docsWithField = null; if (terms != null) { if (setDocsWithField) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc)); setDocsWithField = false; } } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final long termval = parser.parseLong(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new long[maxDoc]; } docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } retArray[docID] = termval; if (setDocsWithField) { if (docsWithField == null) { // Lazy init docsWithField = new FixedBitSet(maxDoc); } docsWithField.set(docID); } } } } catch (FieldCache.StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new long[maxDoc]; } if (setDocsWithField) { wrapper.setDocsWithField(reader, field, docsWithField); } return retArray; } } // inherit javadocs public double[] getDoubles(AtomicReader reader, String field, boolean setDocsWithField) throws IOException { return getDoubles(reader, field, null, setDocsWithField); } // inherit javadocs public double[] getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField) throws IOException { return (double[]) caches.get(Double.TYPE).get(reader, new Entry(field, parser), setDocsWithField); } static final class DoubleCache extends Cache { DoubleCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField) throws IOException { String field = entryKey.field; FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entryKey.custom; if (parser == null) { try { return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER, setDocsWithField); } catch (NumberFormatException ne) { return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER, setDocsWithField); } } final int maxDoc = reader.maxDoc(); double[] retArray = null; Terms terms = reader.terms(field); FixedBitSet docsWithField = null; if (terms != null) { if (setDocsWithField) { final int termsDocCount = terms.getDocCount(); assert termsDocCount <= maxDoc; if (termsDocCount == maxDoc) { // Fast case: all docs have this field: wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc)); setDocsWithField = false; } } final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final double termval = parser.parseDouble(term); if (retArray == null) { // late init so numeric fields don't double allocate retArray = new double[maxDoc]; } docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } retArray[docID] = termval; if (setDocsWithField) { if (docsWithField == null) { // Lazy init docsWithField = new FixedBitSet(maxDoc); } docsWithField.set(docID); } } } } catch (FieldCache.StopFillCacheException stop) { } } if (retArray == null) { // no values retArray = new double[maxDoc]; } if (setDocsWithField) { wrapper.setDocsWithField(reader, field, docsWithField); } return retArray; } } public static class DocTermsIndexImpl extends DocTermsIndex { private final PagedBytes.Reader bytes; private final PackedInts.Reader termOrdToBytesOffset; private final PackedInts.Reader docToTermOrd; private final int numOrd; public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { this.bytes = bytes; this.docToTermOrd = docToTermOrd; this.termOrdToBytesOffset = termOrdToBytesOffset; this.numOrd = numOrd; } @Override public PackedInts.Reader getDocToOrd() { return docToTermOrd; } @Override public int numOrd() { return numOrd; } @Override public int getOrd(int docID) { return (int) docToTermOrd.get(docID); } @Override public int size() { return docToTermOrd.size(); } @Override public BytesRef lookup(int ord, BytesRef ret) { return bytes.fill(ret, termOrdToBytesOffset.get(ord)); } @Override public TermsEnum getTermsEnum() { return this.new DocTermsIndexEnum(); } class DocTermsIndexEnum extends TermsEnum { int currentOrd; int currentBlockNumber; int end; // end position in the current block final byte[][] blocks; final int[] blockEnds; final BytesRef term = new BytesRef(); public DocTermsIndexEnum() { currentOrd = 0; currentBlockNumber = 0; blocks = bytes.getBlocks(); blockEnds = bytes.getBlockEnds(); currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get(0)); end = blockEnds[currentBlockNumber]; } @Override public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException { int low = 1; int high = numOrd-1; while (low <= high) { int mid = (low + high) >>> 1; seekExact(mid); int cmp = term.compareTo(text); if (cmp < 0) low = mid + 1; else if (cmp > 0) high = mid - 1; else return SeekStatus.FOUND; // key found } if (low == numOrd) { return SeekStatus.END; } else { seekExact(low); return SeekStatus.NOT_FOUND; } } public void seekExact(long ord) throws IOException { assert(ord >= 0 && ord <= numOrd); // TODO: if gap is small, could iterate from current position? Or let user decide that? currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord)); end = blockEnds[currentBlockNumber]; currentOrd = (int)ord; } @Override public BytesRef next() throws IOException { int start = term.offset + term.length; if (start >= end) { // switch byte blocks if (currentBlockNumber +1 >= blocks.length) { return null; } currentBlockNumber++; term.bytes = blocks[currentBlockNumber]; end = blockEnds[currentBlockNumber]; start = 0; if (end<=0) return null; // special case of empty last array } currentOrd++; byte[] block = term.bytes; if ((block[start] & 128) == 0) { term.length = block[start]; term.offset = start+1; } else { term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff); term.offset = start+2; } return term; } @Override public BytesRef term() throws IOException { return term; } @Override public long ord() throws IOException { return currentOrd; } @Override public int docFreq() { throw new UnsupportedOperationException(); } @Override public long totalTermFreq() { return -1; } @Override public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } @Override public Comparator<BytesRef> getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } @Override public void seekExact(BytesRef term, TermState state) throws IOException { assert state != null && state instanceof OrdTermState; this.seekExact(((OrdTermState)state).ord); } @Override public TermState termState() throws IOException { OrdTermState state = new OrdTermState(); state.ord = currentOrd; return state; } } } public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException { return getTermsIndex(reader, field, PackedInts.FAST); } public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, acceptableOverheadRatio), false); } static class DocTermsIndexCache extends Cache { DocTermsIndexCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */) throws IOException { Terms terms = reader.terms(entryKey.field); final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue(); final PagedBytes bytes = new PagedBytes(15); int startBytesBPV; int startTermsBPV; int startNumUniqueTerms; int maxDoc = reader.maxDoc(); final int termCountHardLimit; if (maxDoc == Integer.MAX_VALUE) { termCountHardLimit = Integer.MAX_VALUE; } else { termCountHardLimit = maxDoc+1; } if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which // is fine -- GrowableWriter will reallocate as needed long numUniqueTerms = terms.size(); if (numUniqueTerms != -1L) { if (numUniqueTerms > termCountHardLimit) { // app is misusing the API (there is more than // one term per doc); in this case we make best // effort to load what we can (see LUCENE-2142) numUniqueTerms = termCountHardLimit; } startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4); startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); startNumUniqueTerms = (int) numUniqueTerms; } else { startBytesBPV = 1; startTermsBPV = 1; startNumUniqueTerms = 1; } } else { startBytesBPV = 1; startTermsBPV = 1; startNumUniqueTerms = 1; } GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio); final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio); // 0 is reserved for "unset" bytes.copyUsingLengthPrefix(new BytesRef()); int termOrd = 1; if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } if (termOrd >= termCountHardLimit) { break; } if (termOrd == termOrdToBytesOffset.size()) { // NOTE: this code only runs if the incoming // reader impl doesn't implement // size (which should be uncommon) termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); } termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } docToTermOrd.set(docID, termOrd); } termOrd++; } if (termOrdToBytesOffset.size() > termOrd) { termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd); } } // maybe an int-only impl? return new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); } } private static class DocTermsImpl extends DocTerms { private final PagedBytes.Reader bytes; private final PackedInts.Reader docToOffset; public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { this.bytes = bytes; this.docToOffset = docToOffset; } @Override public int size() { return docToOffset.size(); } @Override public boolean exists(int docID) { return docToOffset.get(docID) == 0; } @Override public BytesRef getTerm(int docID, BytesRef ret) { final int pointer = (int) docToOffset.get(docID); return bytes.fill(ret, pointer); } } // TODO: this if DocTermsIndex was already created, we // should share it... public DocTerms getTerms(AtomicReader reader, String field) throws IOException { return getTerms(reader, field, PackedInts.FAST); } public DocTerms getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, acceptableOverheadRatio), false); } static final class DocTermsCache extends Cache { DocTermsCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */) throws IOException { Terms terms = reader.terms(entryKey.field); final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue(); final int termCountHardLimit = reader.maxDoc(); // Holds the actual term data, expanded. final PagedBytes bytes = new PagedBytes(15); int startBPV; if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which // is fine -- GrowableWriter will reallocate as needed long numUniqueTerms = terms.size(); if (numUniqueTerms != -1L) { if (numUniqueTerms > termCountHardLimit) { numUniqueTerms = termCountHardLimit; } startBPV = PackedInts.bitsRequired(numUniqueTerms*4); } else { startBPV = 1; } } else { startBPV = 1; } final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), acceptableOverheadRatio); // pointer==0 means not set bytes.copyUsingLengthPrefix(new BytesRef()); if (terms != null) { int termCount = 0; final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; while(true) { if (termCount++ == termCountHardLimit) { // app is misusing the API (there is more than // one term per doc); in this case we make best // effort to load what we can (see LUCENE-2142) break; } final BytesRef term = termsEnum.next(); if (term == null) { break; } final long pointer = bytes.copyUsingLengthPrefix(term); docs = termsEnum.docs(null, docs, 0); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } docToOffset.set(docID, pointer); } } } // maybe an int-only impl? return new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable()); } } public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException { return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, null), false); } static final class DocTermOrdsCache extends Cache { DocTermOrdsCache(FieldCacheImpl wrapper) { super(wrapper); } @Override protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */) throws IOException { return new DocTermOrds(reader, entryKey.field); } } private volatile PrintStream infoStream; public void setInfoStream(PrintStream stream) { infoStream = stream; } public PrintStream getInfoStream() { return infoStream; } }