package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; /** * @lucene.experimental */ public class SegmentReader extends IndexReader implements Cloneable { protected boolean readOnly; private SegmentInfo si; private int readBufferSize; CloseableThreadLocal<FieldsReader> fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>(); volatile BitVector deletedDocs; AtomicInteger deletedDocsRef = null; private boolean deletedDocsDirty = false; private boolean normsDirty = false; private int pendingDeleteCount; private boolean rollbackHasChanges = false; private boolean rollbackDeletedDocsDirty = false; private boolean rollbackNormsDirty = false; private SegmentInfo rollbackSegmentInfo; private int rollbackPendingDeleteCount; // optionally used for the .nrm file shared by multiple norms private IndexInput singleNormStream; private AtomicInteger singleNormRef; CoreReaders core; // Holds core readers that are shared (unchanged) when // SegmentReader is cloned or reopened static final class CoreReaders { // Counts how many other reader share the core objects // (freqStream, proxStream, tis, etc.) of this reader; // when coreRef drops to 0, these core objects may be // closed. A given instance of SegmentReader may be // closed, even those it shares core objects with other // SegmentReaders: private final AtomicInteger ref = new AtomicInteger(1); final String segment; final FieldInfos fieldInfos; final FieldsProducer fields; final CodecProvider codecs; final Directory dir; final Directory cfsDir; final int readBufferSize; final int termsIndexDivisor; private final SegmentReader origInstance; FieldsReader fieldsReaderOrig; TermVectorsReader termVectorsReaderOrig; CompoundFileReader cfsReader; CompoundFileReader storeCFSReader; CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor, CodecProvider codecs) throws IOException { if (termsIndexDivisor == 0) { throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } segment = si.name; if (codecs == null) { codecs = CodecProvider.getDefault(); } this.codecs = codecs; this.readBufferSize = readBufferSize; this.dir = dir; boolean success = false; try { Directory dir0 = dir; if (si.getUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELD_INFOS_EXTENSION)); this.termsIndexDivisor = termsIndexDivisor; // Ask codec for its Fields fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor)); assert fields != null; success = true; } finally { if (!success) { decRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; } synchronized TermVectorsReader getTermVectorsReaderOrig() { return termVectorsReaderOrig; } synchronized FieldsReader getFieldsReaderOrig() { return fieldsReaderOrig; } synchronized void incRef() { ref.incrementAndGet(); } synchronized Directory getCFSReader() { return cfsReader; } synchronized void decRef() throws IOException { if (ref.decrementAndGet() == 0) { if (fields != null) { fields.close(); } if (termVectorsReaderOrig != null) { termVectorsReaderOrig.close(); } if (fieldsReaderOrig != null) { fieldsReaderOrig.close(); } if (cfsReader != null) { cfsReader.close(); } if (storeCFSReader != null) { storeCFSReader.close(); } // Force FieldCache to evict our entries at this // point. If the exception occurred while // initializing the core readers, then // origInstance will be null, and we don't want // to call FieldCache.purge (it leads to NPE): if (origInstance != null) { FieldCache.DEFAULT.purge(origInstance); } } } synchronized void openDocStores(SegmentInfo si) throws IOException { assert si.name.equals(segment); if (fieldsReaderOrig == null) { final Directory storeDir; if (si.getDocStoreOffset() != -1) { if (si.getDocStoreIsCompoundFile()) { assert storeCFSReader == null; storeCFSReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(si.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), readBufferSize); storeDir = storeCFSReader; assert storeDir != null; } else { storeDir = dir; assert storeDir != null; } } else if (si.getUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize); } storeDir = cfsReader; assert storeDir != null; } else { storeDir = dir; assert storeDir != null; } final String storesSegment; if (si.getDocStoreOffset() != -1) { storesSegment = si.getDocStoreSegment(); } else { storesSegment = segment; } fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.getDocStoreOffset() == -1 && fieldsReaderOrig.size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.hasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount); } } } } /** * Sets the initial value */ private class FieldsReaderLocal extends CloseableThreadLocal<FieldsReader> { @Override protected FieldsReader initialValue() { return (FieldsReader) core.getFieldsReaderOrig().clone(); } } /** * Byte[] referencing is used because a new norm object needs * to be created for each clone, and the byte array is all * that is needed for sharing between cloned readers. The * current norm referencing is for sharing between readers * whereas the byte[] referencing is for copy on write which * is independent of reader references (i.e. incRef, decRef). */ final class Norm implements Cloneable { private int refCount = 1; // If this instance is a clone, the originalNorm // references the Norm that has a real open IndexInput: private Norm origNorm; private IndexInput in; private long normSeek; // null until bytes is set private AtomicInteger bytesRef; private byte[] bytes; private boolean dirty; private int number; private boolean rollbackDirty; public Norm(IndexInput in, int number, long normSeek) { this.in = in; this.number = number; this.normSeek = normSeek; } public synchronized void incRef() { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); refCount++; } private void closeInput() throws IOException { if (in != null) { if (in != singleNormStream) { // It's private to us -- just close it in.close(); } else { // We are sharing this with others -- decRef and // maybe close the shared norm stream if (singleNormRef.decrementAndGet() == 0) { singleNormStream.close(); singleNormStream = null; } } in = null; } } public synchronized void decRef() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); if (--refCount == 0) { if (origNorm != null) { origNorm.decRef(); origNorm = null; } else { closeInput(); } if (bytes != null) { assert bytesRef != null; bytesRef.decrementAndGet(); bytes = null; bytesRef = null; } else { assert bytesRef == null; } } } // Load bytes but do not cache them if they were not // already cached public synchronized void bytes(byte[] bytesOut, int offset, int len) throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); if (bytes != null) { // Already cached -- copy from cache: assert len <= maxDoc(); System.arraycopy(bytes, 0, bytesOut, offset, len); } else { // Not cached if (origNorm != null) { // Ask origNorm to load origNorm.bytes(bytesOut, offset, len); } else { // We are orig -- read ourselves from disk: synchronized(in) { in.seek(normSeek); in.readBytes(bytesOut, offset, len, false); } } } } // Load & cache full bytes array. Returns bytes. public synchronized byte[] bytes() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); if (bytes == null) { // value not yet read assert bytesRef == null; if (origNorm != null) { // Ask origNorm to load so that for a series of // reopened readers we share a single read-only // byte[] bytes = origNorm.bytes(); bytesRef = origNorm.bytesRef; bytesRef.incrementAndGet(); // Once we've loaded the bytes we no longer need // origNorm: origNorm.decRef(); origNorm = null; } else { // We are the origNorm, so load the bytes for real // ourself: final int count = maxDoc(); bytes = new byte[count]; // Since we are orig, in must not be null assert in != null; // Read from disk. synchronized(in) { in.seek(normSeek); in.readBytes(bytes, 0, count, false); } bytesRef = new AtomicInteger(1); closeInput(); } } return bytes; } // Only for testing AtomicInteger bytesRef() { return bytesRef; } // Called if we intend to change a norm value. We make a // private copy of bytes if it's shared with others: public synchronized byte[] copyOnWrite() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); bytes(); assert bytes != null; assert bytesRef != null; if (bytesRef.get() > 1) { // I cannot be the origNorm for another norm // instance if I'm being changed. Ie, only the // "head Norm" can be changed: assert refCount == 1; final AtomicInteger oldRef = bytesRef; bytes = cloneNormBytes(bytes); bytesRef = new AtomicInteger(1); oldRef.decrementAndGet(); } dirty = true; return bytes; } // Returns a copy of this Norm instance that shares // IndexInput & bytes with the original one @Override public synchronized Object clone() { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); Norm clone; try { clone = (Norm) super.clone(); } catch (CloneNotSupportedException cnse) { // Cannot happen throw new RuntimeException("unexpected CloneNotSupportedException", cnse); } clone.refCount = 1; if (bytes != null) { assert bytesRef != null; assert origNorm == null; // Clone holds a reference to my bytes: clone.bytesRef.incrementAndGet(); } else { assert bytesRef == null; if (origNorm == null) { // I become the origNorm for the clone: clone.origNorm = this; } clone.origNorm.incRef(); } // Only the origNorm will actually readBytes from in: clone.in = null; return clone; } // Flush all pending changes to the next generation // separate norms file. public void reWrite(SegmentInfo si) throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm; // NOTE: norms are re-written in regular directory, not cfs si.advanceNormGen(this.number); final String normFileName = si.getNormFileName(this.number); IndexOutput out = directory().createOutput(normFileName); boolean success = false; try { try { out.writeBytes(bytes, maxDoc()); } finally { out.close(); } success = true; } finally { if (!success) { try { directory().deleteFile(normFileName); } catch (Throwable t) { // suppress this so we keep throwing the // original exception } } } this.dirty = false; } } Map<String,Norm> norms = new HashMap<String,Norm>(); /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException { return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor, null); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(boolean readOnly, Directory dir, SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor, CodecProvider codecs) throws CorruptIndexException, IOException { if (codecs == null) { codecs = CodecProvider.getDefault(); } SegmentReader instance = new SegmentReader(); instance.readOnly = readOnly; instance.si = si; instance.readBufferSize = readBufferSize; boolean success = false; try { instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor, codecs); if (doOpenStores) { instance.core.openDocStores(si); } instance.loadDeletedDocs(); instance.openNorms(instance.core.cfsDir, readBufferSize); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { instance.doClose(); } } return instance; } void openDocStores() throws IOException { core.openDocStores(si); } @Override public Bits getDeletedDocs() { return deletedDocs; } private boolean checkDeletedCounts() throws IOException { final int recomputedCount = deletedDocs.getRecomputedCount(); assert deletedDocs.count() == recomputedCount : "deleted count=" + deletedDocs.count() + " vs recomputed count=" + recomputedCount; assert si.getDelCount() == recomputedCount : "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + recomputedCount; // Verify # deletes does not exceed maxDoc for this // segment: assert si.getDelCount() <= maxDoc() : "delete count mismatch: " + recomputedCount + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name; return true; } private void loadDeletedDocs() throws IOException { // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) { deletedDocs = new BitVector(directory(), si.getDelFileName()); deletedDocsRef = new AtomicInteger(1); assert checkDeletedCounts(); if (deletedDocs.size() != si.docCount) { throw new CorruptIndexException("document count mismatch: deleted docs count " + deletedDocs.size() + " vs segment doc count " + si.docCount + " segment=" + si.name); } } else assert si.getDelCount() == 0; } /** * Clones the norm bytes. May be overridden by subclasses. New and experimental. * @param bytes Byte array to clone * @return New BitVector */ protected byte[] cloneNormBytes(byte[] bytes) { byte[] cloneBytes = new byte[bytes.length]; System.arraycopy(bytes, 0, cloneBytes, 0, bytes.length); return cloneBytes; } /** * Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. * @param bv BitVector to clone * @return New BitVector */ protected BitVector cloneDeletedDocs(BitVector bv) { return (BitVector)bv.clone(); } @Override public final synchronized Object clone() { try { return clone(readOnly); // Preserve current readOnly } catch (Exception ex) { throw new RuntimeException(ex); } } @Override public final synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException { return reopenSegment(si, true, openReadOnly); } @Override public synchronized IndexReader reopen() throws CorruptIndexException, IOException { return reopenSegment(si, false, readOnly); } @Override public synchronized IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException { return reopenSegment(si, false, openReadOnly); } synchronized SegmentReader reopenSegment(SegmentInfo si, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException { boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions()) && (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName())); boolean normsUpToDate = true; boolean[] fieldNormsChanged = new boolean[core.fieldInfos.size()]; final int fieldCount = core.fieldInfos.size(); for (int i = 0; i < fieldCount; i++) { if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic // also if both old and new readers aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return this; } // When cloning, the incoming SegmentInfos should not // have any changes in it: assert !doClone || (normsUpToDate && deletionsUpToDate); // clone reader SegmentReader clone = new SegmentReader(); boolean success = false; try { core.incRef(); clone.core = core; clone.readOnly = openReadOnly; clone.si = si; clone.readBufferSize = readBufferSize; clone.pendingDeleteCount = pendingDeleteCount; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader clone.deletedDocsDirty = deletedDocsDirty; clone.normsDirty = normsDirty; clone.hasChanges = hasChanges; hasChanges = false; } if (doClone) { if (deletedDocs != null) { deletedDocsRef.incrementAndGet(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs assert clone.deletedDocs == null; clone.loadDeletedDocs(); } else if (deletedDocs != null) { deletedDocsRef.incrementAndGet(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } clone.norms = new HashMap<String,Norm>(); // Clone norms for (int i = 0; i < fieldNormsChanged.length; i++) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { final String curField = core.fieldInfos.fieldInfo(i).name; Norm norm = this.norms.get(curField); if (norm != null) clone.norms.put(curField, (Norm) norm.clone()); } } // If we are not cloning, then this will open anew // any norms that have changed: clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), readBufferSize); success = true; } finally { if (!success) { // An exception occurred during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.decRef(); } } return clone; } @Override protected void doCommit(Map<String,String> commitUserData) throws IOException { if (hasChanges) { startCommit(); boolean success = false; try { commitChanges(commitUserData); success = true; } finally { if (!success) { rollbackCommit(); } } } } private void commitChanges(Map<String,String> commitUserData) throws IOException { if (deletedDocsDirty) { // re-write deleted si.advanceDelGen(); // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: final String delFileName = si.getDelFileName(); boolean success = false; try { deletedDocs.write(directory(), delFileName); success = true; } finally { if (!success) { try { directory().deleteFile(delFileName); } catch (Throwable t) { // suppress this so we keep throwing the // original exception } } } si.setDelCount(si.getDelCount()+pendingDeleteCount); pendingDeleteCount = 0; assert deletedDocs.count() == si.getDelCount(): "delete count mismatch during commit: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count(); } else { assert pendingDeleteCount == 0; } if (normsDirty) { // re-write norms si.initNormGen(core.fieldInfos.size()); for (final Norm norm : norms.values()) { if (norm.dirty) { norm.reWrite(si); } } } deletedDocsDirty = false; normsDirty = false; hasChanges = false; } FieldsReader getFieldsReader() { return fieldsReaderLocal.get(); } @Override protected void doClose() throws IOException { termVectorsLocal.close(); fieldsReaderLocal.close(); if (deletedDocs != null) { deletedDocsRef.decrementAndGet(); // null so if an app hangs on to us we still free most ram deletedDocs = null; } for (final Norm norm : norms.values()) { norm.decRef(); } if (core != null) { core.decRef(); } } static boolean hasDeletions(SegmentInfo si) throws IOException { // Don't call ensureOpen() here (it could affect performance) return si.hasDeletions(); } @Override public boolean hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return deletedDocs != null; } static boolean usesCompoundFile(SegmentInfo si) throws IOException { return si.getUseCompoundFile(); } static boolean hasSeparateNorms(SegmentInfo si) throws IOException { return si.hasSeparateNorms(); } @Override protected void doDelete(int docNum) { if (deletedDocs == null) { deletedDocs = new BitVector(maxDoc()); deletedDocsRef = new AtomicInteger(1); } // there is more than 1 SegmentReader with a reference to this // deletedDocs BitVector so decRef the current deletedDocsRef, // clone the BitVector, create a new deletedDocsRef if (deletedDocsRef.get() > 1) { AtomicInteger oldRef = deletedDocsRef; deletedDocs = cloneDeletedDocs(deletedDocs); deletedDocsRef = new AtomicInteger(1); oldRef.decrementAndGet(); } deletedDocsDirty = true; if (!deletedDocs.getAndSet(docNum)) pendingDeleteCount++; } @Override protected void doUndeleteAll() { deletedDocsDirty = false; if (deletedDocs != null) { assert deletedDocsRef != null; deletedDocsRef.decrementAndGet(); deletedDocs = null; deletedDocsRef = null; pendingDeleteCount = 0; si.clearDelGen(); si.setDelCount(0); } else { assert deletedDocsRef == null; assert pendingDeleteCount == 0; } } List<String> files() throws IOException { return new ArrayList<String>(si.files()); } FieldInfos fieldInfos() { return core.fieldInfos; } @Override public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { ensureOpen(); return getFieldsReader().doc(n, fieldSelector); } @Override public Fields fields() throws IOException { return core.fields; } @Override public int docFreq(String field, BytesRef term) throws IOException { ensureOpen(); Terms terms = core.fields.terms(field); if (terms != null) { return terms.docFreq(term); } else { return 0; } } @Override public int numDocs() { // Don't call ensureOpen() here (it could affect performance) int n = maxDoc(); if (deletedDocs != null) n -= deletedDocs.count(); return n; } @Override public int maxDoc() { // Don't call ensureOpen() here (it could affect performance) return si.docCount; } /** * @see IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption) */ @Override public Collection<String> getFieldNames(IndexReader.FieldOption fieldOption) { ensureOpen(); Set<String> fieldSet = new HashSet<String>(); for (int i = 0; i < core.fieldInfos.size(); i++) { FieldInfo fi = core.fieldInfos.fieldInfo(i); if (fieldOption == IndexReader.FieldOption.ALL) { fieldSet.add(fi.name); } else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { fieldSet.add(fi.name); } else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) { fieldSet.add(fi.name); } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { fieldSet.add(fi.name); } else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { fieldSet.add(fi.name); } else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { fieldSet.add(fi.name); } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) { fieldSet.add(fi.name); } else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { fieldSet.add(fi.name); } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { fieldSet.add(fi.name); } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { fieldSet.add(fi.name); } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { fieldSet.add(fi.name); } } return fieldSet; } @Override public synchronized boolean hasNorms(String field) { ensureOpen(); return norms.containsKey(field); } // can return null if norms aren't stored protected synchronized byte[] getNorms(String field) throws IOException { Norm norm = norms.get(field); if (norm == null) return null; // not indexed, or norms not stored return norm.bytes(); } // returns fake norms if norms aren't available @Override public synchronized byte[] norms(String field) throws IOException { ensureOpen(); byte[] bytes = getNorms(field); return bytes; } @Override protected void doSetNorm(int doc, String field, byte value) throws IOException { Norm norm = norms.get(field); if (norm == null) // not an indexed field return; normsDirty = true; norm.copyOnWrite()[doc] = value; // set the value } /** Read norms into a pre-allocated array. */ @Override public synchronized void norms(String field, byte[] bytes, int offset) throws IOException { ensureOpen(); Norm norm = norms.get(field); if (norm == null) { Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); return; } norm.bytes(bytes, offset, maxDoc()); } private void openNorms(Directory cfsDir, int readBufferSize) throws IOException { long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now) int maxDoc = maxDoc(); for (int i = 0; i < core.fieldInfos.size(); i++) { FieldInfo fi = core.fieldInfos.fieldInfo(i); if (norms.containsKey(fi.name)) { // in case this SegmentReader is being re-opened, we might be able to // reuse some norm instances and skip loading them here continue; } if (fi.isIndexed && !fi.omitNorms) { Directory d = directory(); String fileName = si.getNormFileName(fi.number); if (!si.hasSeparateNorms(fi.number)) { d = cfsDir; } // singleNormFile means multiple norms share this file boolean singleNormFile = IndexFileNames.matchesExtension(fileName, IndexFileNames.NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.openInput(fileName, readBufferSize); singleNormRef = new AtomicInteger(1); } else { singleNormRef.incrementAndGet(); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normSeek = 0; normInput = d.openInput(fileName); } norms.put(fi.name, new Norm(normInput, fi.number, normSeek)); nextNormSeek += maxDoc; // increment also if some norms are separate } } } // NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not thread safe, and relies on the // synchronization in IndexWriter void loadTermsIndex(int indexDivisor) throws IOException { core.fields.loadTermsIndex(indexDivisor); } // for testing only boolean normsClosed() { if (singleNormStream != null) { return false; } for (final Norm norm : norms.values()) { if (norm.refCount > 0) { return false; } } return true; } // for testing only boolean normsClosed(String field) { return norms.get(field).refCount == 0; } /** * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. * @return TermVectorsReader */ TermVectorsReader getTermVectorsReader() { TermVectorsReader tvReader = termVectorsLocal.get(); if (tvReader == null) { TermVectorsReader orig = core.getTermVectorsReaderOrig(); if (orig == null) { return null; } else { try { tvReader = (TermVectorsReader) orig.clone(); } catch (CloneNotSupportedException cnse) { return null; } } termVectorsLocal.set(tvReader); } return tvReader; } TermVectorsReader getTermVectorsReaderOrig() { return core.getTermVectorsReaderOrig(); } /** Return a term frequency vector for the specified document and field. The * vector returned contains term numbers and frequencies for all terms in * the specified field of this document, if the field had storeTermVector * flag set. If the flag was not set, the method returns null. * @throws IOException */ @Override public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector ensureOpen(); FieldInfo fi = core.fieldInfos.fieldInfo(field); if (fi == null || !fi.storeTermVector) return null; TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; return termVectorsReader.get(docNumber, field); } @Override public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { ensureOpen(); FieldInfo fi = core.fieldInfos.fieldInfo(field); if (fi == null || !fi.storeTermVector) return; TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) { return; } termVectorsReader.get(docNumber, field, mapper); } @Override public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { ensureOpen(); TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return; termVectorsReader.get(docNumber, mapper); } /** Return an array of term frequency vectors for the specified document. * The array contains a vector for each vectorized field in the document. * Each vector vector contains term numbers and frequencies for all terms * in a given vectorized field. * If no such fields existed, the method returns null. * @throws IOException */ @Override public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { ensureOpen(); TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; return termVectorsReader.get(docNumber); } /** {@inheritDoc} */ @Override public String toString() { final StringBuilder buffer = new StringBuilder(); if (hasChanges) { buffer.append('*'); } buffer.append(si.toString(core.dir, pendingDeleteCount)); return buffer.toString(); } /** * Return the name of the segment this reader is reading. */ public String getSegmentName() { return core.segment; } /** * Return the SegmentInfo of the segment this reader is reading. */ SegmentInfo getSegmentInfo() { return si; } void setSegmentInfo(SegmentInfo info) { si = info; } void startCommit() { rollbackSegmentInfo = (SegmentInfo) si.clone(); rollbackHasChanges = hasChanges; rollbackDeletedDocsDirty = deletedDocsDirty; rollbackNormsDirty = normsDirty; rollbackPendingDeleteCount = pendingDeleteCount; for (Norm norm : norms.values()) { norm.rollbackDirty = norm.dirty; } } void rollbackCommit() { si.reset(rollbackSegmentInfo); hasChanges = rollbackHasChanges; deletedDocsDirty = rollbackDeletedDocsDirty; normsDirty = rollbackNormsDirty; pendingDeleteCount = rollbackPendingDeleteCount; for (Norm norm : norms.values()) { norm.dirty = norm.rollbackDirty; } } /** Returns the directory this index resides in. */ @Override public Directory directory() { // Don't ensureOpen here -- in certain cases, when a // cloned/reopened reader needs to commit, it may call // this method on the closed original reader return core.dir; } // This is necessary so that cloned SegmentReaders (which // share the underlying postings data) will map to the // same entry in the FieldCache. See LUCENE-1579. @Override public final Object getCoreCacheKey() { return core; } /** * Lotsa tests did hacks like:<br/> * SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/> * They broke. This method serves as a hack to keep hacks working * We do it with R/W access for the tests (BW compatibility) * @deprecated Remove this when tests are fixed! */ @Deprecated static SegmentReader getOnlySegmentReader(Directory dir) throws IOException { return getOnlySegmentReader(IndexReader.open(dir, false)); } static SegmentReader getOnlySegmentReader(IndexReader reader) { if (reader instanceof SegmentReader) return (SegmentReader) reader; if (reader instanceof DirectoryReader) { IndexReader[] subReaders = reader.getSequentialSubReaders(); if (subReaders.length != 1) throw new IllegalArgumentException(reader + " has " + subReaders.length + " segments instead of exactly one"); return (SegmentReader) subReaders[0]; } throw new IllegalArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader"); } @Override public int getTermInfosIndexDivisor() { return core.termsIndexDivisor; } }