package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.Closeable; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.search.Similarity; import org.apache.lucene.store.*; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ReaderUtil; // for javadocs import org.apache.lucene.util.VirtualMethod; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; /** IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, so that any subclass which implements it is searchable. <p> Concrete subclasses of IndexReader are usually constructed with a call to one of the static <code>open()</code> methods, e.g. {@link #open(Directory, boolean)}. <p> For efficiency, in this API documents are often referred to via <i>document numbers</i>, non-negative integers which each name a unique document in the index. These document numbers are ephemeral--they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions. <p> An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used to delete documents from the index then. <p> <b>NOTE</b>: for backwards API compatibility, several methods are not listed as abstract, but have no useful implementations in this base class and instead always throw UnsupportedOperationException. Subclasses are strongly encouraged to override these methods, but in many cases may not need to. </p> <p> <b>NOTE</b>: as of 2.4, it's possible to open a read-only IndexReader using the static open methods that accept the boolean readOnly parameter. Such a reader has better concurrency as it's not necessary to synchronize on the isDeleted method. You must specify false if you want to make changes with the resulting IndexReader. </p> <a name="thread-safety"></a><p><b>NOTE</b>: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use your own (non-Lucene) objects instead. */ public abstract class IndexReader implements Cloneable,Closeable { public static class InvertParams{ public SolrIndexSearcher searcher; public SolrParams params; public SolrQueryRequest req; public String[] fields; public DocSet base; public boolean isdetail; public void setDocset(DocSet base) { this.base=base; } public DocSet getDocset() { return this.base; } } public static class InvertResult{ ArrayList<NamedList> result=new ArrayList<NamedList>(); public void setParams(IndexSchema schema,InvertParams params){ } public void merge(InvertResult r) { result.addAll(r.result); } public void setNamelist(NamedList nl) { result.add(nl); } public ArrayList<NamedList> getResult(){ return result; } } public abstract InvertResult invertScan(IndexSchema schema,InvertParams params) throws Exception; public abstract int getMaxInterval() throws Exception; /** * A custom listener that's invoked when the IndexReader * is finished. * * <p>For a SegmentReader, this listener is called only * once all SegmentReaders sharing the same core are * closed. At this point it is safe for apps to evict * this reader from any caches keyed on {@link * #getCoreCacheKey}. This is the same interface that * {@link FieldCache} uses, internally, to evict * entries.</p> * * <p>For other readers, this listener is called when they * are closed.</p> * * @lucene.experimental */ public static interface ReaderFinishedListener { public void finished(IndexReader reader); } // Impls must set this if they may call add/removeReaderFinishedListener: protected volatile Collection<ReaderFinishedListener> readerFinishedListeners; /** Expert: adds a {@link ReaderFinishedListener}. The * provided listener is also added to any sub-readers, if * this is a composite reader. Also, any reader reopened * or cloned from this one will also copy the listeners at * the time of reopen. * * @lucene.experimental */ public void addReaderFinishedListener(ReaderFinishedListener listener) { ensureOpen(); readerFinishedListeners.add(listener); } /** Expert: remove a previously added {@link ReaderFinishedListener}. * * @lucene.experimental */ public void removeReaderFinishedListener(ReaderFinishedListener listener) { ensureOpen(); readerFinishedListeners.remove(listener); } protected void notifyReaderFinishedListeners() { // Defensive (should never be null -- all impls must set // this): if (readerFinishedListeners != null) { for(ReaderFinishedListener listener : readerFinishedListeners) { listener.finished(this); } } } protected void readerFinished() { notifyReaderFinishedListeners(); } /** * Constants describing field properties, for example used for * {@link IndexReader#getFieldNames(FieldOption)}. */ public static enum FieldOption { /** All fields */ ALL, /** All indexed fields */ INDEXED, /** All fields that store payloads */ STORES_PAYLOADS, /** All fields that omit tf */ OMIT_TERM_FREQ_AND_POSITIONS, /** All fields that omit positions */ OMIT_POSITIONS, /** All fields which are not indexed */ UNINDEXED, /** All fields which are indexed with termvectors enabled */ INDEXED_WITH_TERMVECTOR, /** All fields which are indexed but don't have termvectors enabled */ INDEXED_NO_TERMVECTOR, /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */ TERMVECTOR, /** All fields with termvectors with position values enabled */ TERMVECTOR_WITH_POSITION, /** All fields with termvectors with offset values enabled */ TERMVECTOR_WITH_OFFSET, /** All fields with termvectors with offset values and position values enabled */ TERMVECTOR_WITH_POSITION_OFFSET, } private volatile boolean closed; protected boolean hasChanges; private final AtomicInteger refCount = new AtomicInteger(); static int DEFAULT_TERMS_INDEX_DIVISOR = 1; /** Expert: returns the current refCount for this reader */ public int getRefCount() { return refCount.get(); } /** * Expert: increments the refCount of this IndexReader * instance. RefCounts are used to determine when a * reader can be closed safely, i.e. as soon as there are * no more references. Be sure to always call a * corresponding {@link #decRef}, in a finally clause; * otherwise the reader may never be closed. Note that * {@link #close} simply calls decRef(), which means that * the IndexReader will not really be closed until {@link * #decRef} has been called for all outstanding * references. * * @see #decRef * @see #tryIncRef */ public void incRef() { ensureOpen(); refCount.incrementAndGet(); } /** * Expert: increments the refCount of this IndexReader * instance only if the IndexReader has not been closed yet * and returns <code>true</code> iff the refCount was * successfully incremented, otherwise <code>false</code>. * If this method returns <code>false</code> the reader is either * already closed or is currently been closed. Either way this * reader instance shouldn't be used by an application unless * <code>true</code> is returned. * <p> * RefCounts are used to determine when a * reader can be closed safely, i.e. as soon as there are * no more references. Be sure to always call a * corresponding {@link #decRef}, in a finally clause; * otherwise the reader may never be closed. Note that * {@link #close} simply calls decRef(), which means that * the IndexReader will not really be closed until {@link * #decRef} has been called for all outstanding * references. * * @see #decRef * @see #incRef */ public boolean tryIncRef() { int count; while ((count = refCount.get()) > 0) { if (refCount.compareAndSet(count, count+1)) { return true; } } return false; } /** {@inheritDoc} */ @Override public String toString() { final StringBuilder buffer = new StringBuilder(); if (hasChanges) { buffer.append('*'); } buffer.append(getClass().getSimpleName()); buffer.append('('); final IndexReader[] subReaders = getSequentialSubReaders(); if ((subReaders != null) && (subReaders.length > 0)) { buffer.append(subReaders[0]); for (int i = 1; i < subReaders.length; ++i) { buffer.append(" ").append(subReaders[i]); } } buffer.append(')'); return buffer.toString(); } /** * Expert: decreases the refCount of this IndexReader * instance. If the refCount drops to 0, then pending * changes (if any) are committed to the index and this * reader is closed. If an exception is hit, the refCount * is unchanged. * * @throws IOException in case an IOException occurs in commit() or doClose() * * @see #incRef */ public void decRef() throws IOException { ensureOpen(); final int rc = refCount.getAndDecrement(); if (rc == 1) { boolean success = false; try { commit(); doClose(); success = true; } finally { if (!success) { // Put reference back on failure refCount.incrementAndGet(); } } readerFinished(); } else if (rc <= 0) { throw new IllegalStateException("too many decRef calls: refCount was " + rc + " before decrement"); } } protected IndexReader() { refCount.set(1); } /** * @throws AlreadyClosedException if this IndexReader is closed */ protected final void ensureOpen() throws AlreadyClosedException { if (refCount.get() <= 0) { throw new AlreadyClosedException("this IndexReader is closed"); } } /** Returns a IndexReader reading the index in the given * Directory, with readOnly=true. * @param directory the index directory * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { return open(directory, null, null, true, DEFAULT_TERMS_INDEX_DIVISOR); } /** Returns an IndexReader reading the index in the given * Directory. You should pass readOnly=true, since it * gives much better concurrent performance, unless you * intend to do write operations (delete documents or * change norms) with the reader. * @param directory the index directory * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException { return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}. * * @param writer The IndexWriter to open from * @param applyAllDeletes If true, all buffered deletes will * be applied (made visible) in the returned reader. If * false, the deletes are not applied but remain buffered * (in IndexWriter) so that they will be applied in the * future. Applying deletes can be costly, so if your app * can tolerate deleted documents being returned you might * gain some performance by passing false. * @return The new IndexReader * @throws CorruptIndexException * @throws IOException if there is a low-level IO error * * @see #openIfChanged(IndexReader,IndexWriter,boolean) * * @lucene.experimental */ public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { return writer.getReader(applyAllDeletes); } /** Expert: returns an IndexReader reading the index in the given * {@link IndexCommit}. You should pass readOnly=true, since it * gives much better concurrent performance, unless you * intend to do write operations (delete documents or * change norms) with the reader. * @param commit the commit point to open * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException { return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in * the given Directory, with a custom {@link * IndexDeletionPolicy}. You should pass readOnly=true, * since it gives much better concurrent performance, * unless you intend to do write operations (delete * documents or change norms) with the reader. * @param directory the index directory * @param deletionPolicy a custom deletion policy (only used * if you use this reader to perform deletes or to set * norms); see {@link IndexWriter} for details. * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in * the given Directory, with a custom {@link * IndexDeletionPolicy}. You should pass readOnly=true, * since it gives much better concurrent performance, * unless you intend to do write operations (delete * documents or change norms) with the reader. * @param directory the index directory * @param deletionPolicy a custom deletion policy (only used * if you use this reader to perform deletes or to set * norms); see {@link IndexWriter} for details. * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @param termInfosIndexDivisor Subsamples which indexed * terms are loaded into RAM. This has the same effect as {@link * IndexWriter#setTermIndexInterval} except that setting * must be done at indexing time while this setting can be * set per reader. When set to N, then one in every * N*termIndexInterval terms in the index is loaded into * memory. By setting this to a value > 1 you can reduce * memory usage, at the expense of higher latency when * loading a TermInfo. The default value is 1. Set this * to -1 to skip loading the terms index entirely. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); } /** Expert: returns an IndexReader reading the index in * the given Directory, using a specific commit and with * a custom {@link IndexDeletionPolicy}. You should pass * readOnly=true, since it gives much better concurrent * performance, unless you intend to do write operations * (delete documents or change norms) with the reader. * @param commit the specific {@link IndexCommit} to open; * see {@link IndexReader#listCommits} to list all commits * in a directory * @param deletionPolicy a custom deletion policy (only used * if you use this reader to perform deletes or to set * norms); see {@link IndexWriter} for details. * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in * the given Directory, using a specific commit and with * a custom {@link IndexDeletionPolicy}. You should pass * readOnly=true, since it gives much better concurrent * performance, unless you intend to do write operations * (delete documents or change norms) with the reader. * @param commit the specific {@link IndexCommit} to open; * see {@link IndexReader#listCommits} to list all commits * in a directory * @param deletionPolicy a custom deletion policy (only used * if you use this reader to perform deletes or to set * norms); see {@link IndexWriter} for details. * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader * @param termInfosIndexDivisor Subsamples which indexed * terms are loaded into RAM. This has the same effect as {@link * IndexWriter#setTermIndexInterval} except that setting * must be done at indexing time while this setting can be * set per reader. When set to N, then one in every * N*termIndexInterval terms in the index is loaded into * memory. By setting this to a value > 1 you can reduce * memory usage, at the expense of higher latency when * loading a TermInfo. The default value is 1. Set this * to -1 to skip loading the terms index entirely. This is only useful in * advanced situations when you will only .next() through all terms; * attempts to seek will hit an exception. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); } private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); } /** * If the index has changed since the provided reader was * opened, open and return a new reader; else, return * null. The new reader, if not null, will be the same * type of reader as the previous one, ie an NRT reader * will open a new NRT reader, a MultiReader will open a * new MultiReader, etc. * * <p>This method is typically far less costly than opening a * fully new <code>IndexReader</code> as it shares * resources (for example sub-readers) with the provided * <code>IndexReader</code>, when possible. * * <p>The provided reader is not closed (you are responsible * for doing so); if a new reader is returned you also * must eventually close it. Be sure to never close a * reader while other threads are still using it; see * <code>SearcherManager</code> in * <code>contrib/misc</code> to simplify managing this. * * <p>If a new reader is returned, it's safe to make changes * (deletions, norms) with it. All shared mutable state * with the old reader uses "copy on write" semantics to * ensure the changes are not seen by other readers. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @return null if there are no changes; else, a new * IndexReader instance which you must eventually close */ public static IndexReader openIfChanged(IndexReader oldReader) throws IOException { if (oldReader.hasNewReopenAPI1) { final IndexReader newReader = oldReader.doOpenIfChanged(); assert newReader != oldReader; return newReader; } else { final IndexReader newReader = oldReader.reopen(); if (newReader == oldReader) { return null; } else { return newReader; } } } /** * If the index has changed since the provided reader was * opened, open and return a new reader, with the * specified <code>readOnly</code>; else, return * null. * * @see #openIfChanged(IndexReader) */ public static IndexReader openIfChanged(IndexReader oldReader, boolean readOnly) throws IOException { if (oldReader.hasNewReopenAPI2) { final IndexReader newReader = oldReader.doOpenIfChanged(readOnly); assert newReader != oldReader; return newReader; } else { final IndexReader newReader = oldReader.reopen(readOnly); if (newReader == oldReader) { return null; } else { return newReader; } } } /** * If the IndexCommit differs from what the * provided reader is searching, or the provided reader is * not already read-only, open and return a new * <code>readOnly=true</code> reader; else, return null. * * @see #openIfChanged(IndexReader) */ // TODO: should you be able to specify readOnly? public static IndexReader openIfChanged(IndexReader oldReader, IndexCommit commit) throws IOException { if (oldReader.hasNewReopenAPI3) { final IndexReader newReader = oldReader.doOpenIfChanged(commit); assert newReader != oldReader; return newReader; } else { final IndexReader newReader = oldReader.reopen(commit); if (newReader == oldReader) { return null; } else { return newReader; } } } /** * Expert: If there changes (committed or not) in the * {@link IndexWriter} versus what the provided reader is * searching, then open and return a new read-only * IndexReader searching both committed and uncommitted * changes from the writer; else, return null (though, the * current implementation never returns null). * * <p>This provides "near real-time" searching, in that * changes made during an {@link IndexWriter} session can be * quickly made available for searching without closing * the writer nor calling {@link #commit}. * * <p>It's <i>near</i> real-time because there is no hard * guarantee on how quickly you can get a new reader after * making changes with IndexWriter. You'll have to * experiment in your situation to determine if it's * fast enough. As this is a new and experimental * feature, please report back on your findings so we can * learn, improve and iterate.</p> * * <p>The very first time this method is called, this * writer instance will make every effort to pool the * readers that it opens for doing merges, applying * deletes, etc. This means additional resources (RAM, * file descriptors, CPU time) will be consumed.</p> * * <p>For lower latency on reopening a reader, you should * call {@link IndexWriterConfig#setMergedSegmentWarmer} to * pre-warm a newly merged segment before it's committed * to the index. This is important for minimizing * index-to-search delay after a large merge. </p> * * <p>If an addIndexes* call is running in another thread, * then this reader will only search those segments from * the foreign index that have been successfully copied * over, so far.</p> * * <p><b>NOTE</b>: Once the writer is closed, any * outstanding readers may continue to be used. However, * if you attempt to reopen any of those readers, you'll * hit an {@link AlreadyClosedException}.</p> * * @return IndexReader that covers entire index plus all * changes made so far by this IndexWriter instance, or * null if there are no new changes * * @param writer The IndexWriter to open from * * @param applyAllDeletes If true, all buffered deletes will * be applied (made visible) in the returned reader. If * false, the deletes are not applied but remain buffered * (in IndexWriter) so that they will be applied in the * future. Applying deletes can be costly, so if your app * can tolerate deleted documents being returned you might * gain some performance by passing false. * * @throws IOException * * @lucene.experimental */ public static IndexReader openIfChanged(IndexReader oldReader, IndexWriter writer, boolean applyAllDeletes) throws IOException { if (oldReader.hasNewReopenAPI4) { final IndexReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes); assert newReader != oldReader; return newReader; } else { final IndexReader newReader = oldReader.reopen(writer, applyAllDeletes); if (newReader == oldReader) { return null; } else { return newReader; } } } /** * Refreshes an IndexReader if the index has changed since this instance * was (re)opened. * <p> * Opening an IndexReader is an expensive operation. This method can be used * to refresh an existing IndexReader to reduce these costs. This method * tries to only load segments that have changed or were created after the * IndexReader was (re)opened. * <p> * If the index has not changed since this instance was (re)opened, then this * call is a NOOP and returns this instance. Otherwise, a new instance is * returned. The old instance is <b>not</b> closed and remains usable.<br> * <p> * If the reader is reopened, even though they share * resources internally, it's safe to make changes * (deletions, norms) with the new reader. All shared * mutable state obeys "copy on write" semantics to ensure * the changes are not seen by other readers. * <p> * You can determine whether a reader was actually reopened by comparing the * old instance with the instance returned by this method: * <pre> * IndexReader reader = ... * ... * IndexReader newReader = r.reopen(); * if (newReader != reader) { * ... // reader was reopened * reader.close(); * } * reader = newReader; * ... * </pre> * * Be sure to synchronize that code so that other threads, * if present, can never use reader after it has been * closed and before it's switched to newReader. * * <p><b>NOTE</b>: If this reader is a near real-time * reader (obtained from {@link IndexWriter#getReader()}, * reopen() will simply call writer.getReader() again for * you, though this may change in the future. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @deprecated Use IndexReader#openIfChanged(IndexReader) instead */ @Deprecated public IndexReader reopen() throws CorruptIndexException, IOException { final IndexReader newReader = IndexReader.openIfChanged(this); if (newReader == null) { return this; } else { return newReader; } } /** Just like {@link #reopen()}, except you can change the * readOnly of the original reader. If the index is * unchanged but readOnly is different then a new reader * will be returned. * @deprecated Use * IndexReader#openIfChanged(IndexReader,boolean) instead */ @Deprecated public IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException { final IndexReader newReader = IndexReader.openIfChanged(this, openReadOnly); if (newReader == null) { return this; } else { return newReader; } } /** Expert: reopen this reader on a specific commit point. * This always returns a readOnly reader. If the * specified commit point matches what this reader is * already on, and this reader is already readOnly, then * this same instance is returned; if it is not already * readOnly, a readOnly clone is returned. * @deprecated Use IndexReader#openIfChanged(IndexReader,IndexCommit) instead */ @Deprecated public IndexReader reopen(IndexCommit commit) throws CorruptIndexException, IOException { final IndexReader newReader = IndexReader.openIfChanged(this, commit); if (newReader == null) { return this; } else { return newReader; } } /** * Expert: returns a readonly reader, covering all * committed as well as un-committed changes to the index. * This provides "near real-time" searching, in that * changes made during an IndexWriter session can be * quickly made available for searching without closing * the writer nor calling {@link #commit}. * * <p>Note that this is functionally equivalent to calling * {#flush} (an internal IndexWriter operation) and then using {@link IndexReader#open} to * open a new reader. But the turnaround time of this * method should be faster since it avoids the potentially * costly {@link #commit}.</p> * * <p>You must close the {@link IndexReader} returned by * this method once you are done using it.</p> * * <p>It's <i>near</i> real-time because there is no hard * guarantee on how quickly you can get a new reader after * making changes with IndexWriter. You'll have to * experiment in your situation to determine if it's * fast enough. As this is a new and experimental * feature, please report back on your findings so we can * learn, improve and iterate.</p> * * <p>The resulting reader supports {@link * IndexReader#reopen}, but that call will simply forward * back to this method (though this may change in the * future).</p> * * <p>The very first time this method is called, this * writer instance will make every effort to pool the * readers that it opens for doing merges, applying * deletes, etc. This means additional resources (RAM, * file descriptors, CPU time) will be consumed.</p> * * <p>For lower latency on reopening a reader, you should * call {@link IndexWriterConfig#setMergedSegmentWarmer} to * pre-warm a newly merged segment before it's committed * to the index. This is important for minimizing * index-to-search delay after a large merge. </p> * * <p>If an addIndexes* call is running in another thread, * then this reader will only search those segments from * the foreign index that have been successfully copied * over, so far</p>. * * <p><b>NOTE</b>: Once the writer is closed, any * outstanding readers may continue to be used. However, * if you attempt to reopen any of those readers, you'll * hit an {@link AlreadyClosedException}.</p> * * @return IndexReader that covers entire index plus all * changes made so far by this IndexWriter instance * * @param writer The IndexWriter to open from * @param applyAllDeletes If true, all buffered deletes will * be applied (made visible) in the returned reader. If * false, the deletes are not applied but remain buffered * (in IndexWriter) so that they will be applied in the * future. Applying deletes can be costly, so if your app * can tolerate deleted documents being returned you might * gain some performance by passing false. * * @throws IOException * * @lucene.experimental * @deprecated Use IndexReader#openIfChanged(IndexReader,IndexReader,boolean) instead */ @Deprecated public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { final IndexReader newReader = IndexReader.openIfChanged(this, writer, applyAllDeletes); if (newReader == null) { return this; } else { return newReader; } } /** * If the index has changed since it was opened, open and return a new reader; * else, return {@code null}. * * @see #openIfChanged(IndexReader) */ protected IndexReader doOpenIfChanged() throws CorruptIndexException, IOException { throw new UnsupportedOperationException("This reader does not support reopen()."); } /** * If the index has changed since it was opened, open and return a new reader; * else, return {@code null}. * * @see #openIfChanged(IndexReader, boolean) */ protected IndexReader doOpenIfChanged(boolean openReadOnly) throws CorruptIndexException, IOException { throw new UnsupportedOperationException("This reader does not support reopen()."); } /** * If the index has changed since it was opened, open and return a new reader; * else, return {@code null}. * * @see #openIfChanged(IndexReader, IndexCommit) */ protected IndexReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException { throw new UnsupportedOperationException("This reader does not support reopen(IndexCommit)."); } /** * If the index has changed since it was opened, open and return a new reader; * else, return {@code null}. * * @see #openIfChanged(IndexReader, IndexWriter, boolean) */ protected IndexReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { return writer.getReader(applyAllDeletes); } /** * Efficiently clones the IndexReader (sharing most * internal state). * <p> * On cloning a reader with pending changes (deletions, * norms), the original reader transfers its write lock to * the cloned reader. This means only the cloned reader * may make further changes to the index, and commit the * changes to the index on close, but the old reader still * reflects all changes made up until it was cloned. * <p> * Like {@link #openIfChanged(IndexReader)}, it's safe to make changes to * either the original or the cloned reader: all shared * mutable state obeys "copy on write" semantics to ensure * the changes are not seen by other readers. * <p> */ @Override public synchronized Object clone() { throw new UnsupportedOperationException("This reader does not implement clone()"); } /** * Clones the IndexReader and optionally changes readOnly. A readOnly * reader cannot open a writeable reader. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException { throw new UnsupportedOperationException("This reader does not implement clone()"); } /** * Returns the directory associated with this index. The Default * implementation returns the directory specified by subclasses when * delegating to the IndexReader(Directory) constructor, or throws an * UnsupportedOperationException if one was not specified. * @throws UnsupportedOperationException if no directory */ public Directory directory() { ensureOpen(); throw new UnsupportedOperationException("This reader does not support this method."); } /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException { return ((Long) new SegmentInfos.FindSegmentsFile(directory2) { @Override public Object doBody(String segmentFileName) throws IOException { return Long.valueOf(directory2.fileModified(segmentFileName)); } }.run()).longValue(); } /** * Reads version number from segments files. The version number is * initialized with a timestamp and then increased by one for each change of * the index. * * @param directory where the index resides. * @return version number. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException { return SegmentInfos.readCurrentVersion(directory); } /** * Reads commitUserData, previously passed to {@link * IndexWriter#commit(Map)}, from current index * segments file. This will return null if {@link * IndexWriter#commit(Map)} has never been called for * this index. * * @param directory where the index resides. * @return commit userData. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * * @see #getCommitUserData() */ public static Map<String,String> getCommitUserData(Directory directory) throws CorruptIndexException, IOException { return SegmentInfos.readCurrentUserData(directory); } /** * Version number when this IndexReader was opened. Not * implemented in the IndexReader base class. * * <p>If this reader is based on a Directory (ie, was * created by calling {@link #open}, or {@link #openIfChanged} on * a reader based on a Directory), then this method * returns the version recorded in the commit that the * reader opened. This version is advanced every time * {@link IndexWriter#commit} is called.</p> * * <p>If instead this reader is a near real-time reader * (ie, obtained by a call to {@link * IndexWriter#getReader}, or by calling {@link #openIfChanged} * on a near real-time reader), then this method returns * the version of the last commit done by the writer. * Note that even as further changes are made with the * writer, the version will not changed until a commit is * completed. Thus, you should not rely on this method to * determine when a near real-time reader should be * opened. Use {@link #isCurrent} instead.</p> * * @throws UnsupportedOperationException unless overridden in subclass */ public long getVersion() { throw new UnsupportedOperationException("This reader does not support this method."); } /** * Retrieve the String userData optionally passed to * IndexWriter#commit. This will return null if {@link * IndexWriter#commit(Map)} has never been called for * this index. * * @see #getCommitUserData(Directory) */ public Map<String,String> getCommitUserData() { throw new UnsupportedOperationException("This reader does not support this method."); } /** * Check whether any new changes have occurred to the * index since this reader was opened. * * <p>If this reader is based on a Directory (ie, was * created by calling {@link #open}, or {@link #openIfChanged} on * a reader based on a Directory), then this method checks * if any further commits (see {@link IndexWriter#commit} * have occurred in that directory).</p> * * <p>If instead this reader is a near real-time reader * (ie, obtained by a call to {@link * IndexWriter#getReader}, or by calling {@link #openIfChanged} * on a near real-time reader), then this method checks if * either a new commmit has occurred, or any new * uncommitted changes have taken place via the writer. * Note that even if the writer has only performed * merging, this method will still return false.</p> * * <p>In any event, if this returns false, you should call * {@link #openIfChanged} to get a new reader that sees the * changes.</p> * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @throws UnsupportedOperationException unless overridden in subclass */ public boolean isCurrent() throws CorruptIndexException, IOException { throw new UnsupportedOperationException("This reader does not support this method."); } /** @deprecated Check segment count using {@link * #getSequentialSubReaders} instead. */ @Deprecated public boolean isOptimized() { throw new UnsupportedOperationException("This reader does not support this method."); } /** * Return an array of term frequency vectors for the specified document. * The array contains a vector for each vectorized field in the document. * Each vector contains terms and frequencies for all terms in a given vectorized field. * If no such fields existed, the method returns null. The term vectors that are * returned may either be of type {@link TermFreqVector} * or of type {@link TermPositionVector} if * positions or offsets have been stored. * * @param docNumber document for which term frequency vectors are returned * @return array of term frequency vectors. May be null if no term vectors have been * stored for the specified document. * @throws IOException if index cannot be accessed * @see org.apache.lucene.document.Field.TermVector */ abstract public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException; /** * Return a term frequency vector for the specified document and field. The * returned vector contains terms and frequencies for the terms in * the specified field of this document, if the field had the storeTermVector * flag set. If termvectors had been stored with positions or offsets, a * {@link TermPositionVector} is returned. * * @param docNumber document for which the term frequency vector is returned * @param field field for which the term frequency vector is returned. * @return term frequency vector May be null if field does not exist in the specified * document or term vector was not stored. * @throws IOException if index cannot be accessed * @see org.apache.lucene.document.Field.TermVector */ abstract public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException; /** * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of * the {@link TermFreqVector}. * @param docNumber The number of the document to load the vector for * @param field The name of the field to load * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. * */ abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException; /** * Map all the term vectors for all fields in a Document * @param docNumber The number of the document to load the vector for * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. */ abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException; /** * Returns <code>true</code> if an index exists at the specified directory. * @param directory the directory to check for an index * @return <code>true</code> if an index exists; <code>false</code> otherwise * @throws IOException if there is a problem with accessing the index */ public static boolean indexExists(Directory directory) throws IOException { try { new SegmentInfos().read(directory); return true; } catch (IOException ioe) { return false; } } /** Returns the number of documents in this index. */ public abstract int numDocs(); /** Returns one greater than the largest possible document number. * This may be used to, e.g., determine how big to allocate an array which * will have an element for every document number in an index. */ public abstract int maxDoc(); /** Returns the number of deleted documents. */ public int numDeletedDocs() { return maxDoc() - numDocs(); } /** * Returns the stored fields of the <code>n</code><sup>th</sup> * <code>Document</code> in this index. * <p> * <b>NOTE:</b> for performance reasons, this method does not check if the * requested document is deleted, and therefore asking for a deleted document * may yield unspecified results. Usually this is not required, however you * can call {@link #isDeleted(int)} with the requested document ID to verify * the document is not deleted. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public Document document(int n) throws CorruptIndexException, IOException { ensureOpen(); if (n < 0 || n >= maxDoc()) { throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + n + ")"); } return document(n, null); } /** * Get the {@link org.apache.lucene.document.Document} at the <code>n</code> * <sup>th</sup> position. The {@link FieldSelector} may be used to determine * what {@link org.apache.lucene.document.Field}s to load and how they should * be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying * <code>FieldsReader</code>) is closed before the lazy * {@link org.apache.lucene.document.Field} is loaded an exception may be * thrown. If you want the value of a lazy * {@link org.apache.lucene.document.Field} to be available after closing you * must explicitly load it or fetch the Document again with a new loader. * <p> * <b>NOTE:</b> for performance reasons, this method does not check if the * requested document is deleted, and therefore asking for a deleted document * may yield unspecified results. Usually this is not required, however you * can call {@link #isDeleted(int)} with the requested document ID to verify * the document is not deleted. * * @param n Get the document at the <code>n</code><sup>th</sup> position * @param fieldSelector The {@link FieldSelector} to use to determine what * Fields should be loaded on the Document. May be null, in which case * all Fields will be loaded. * @return The stored fields of the * {@link org.apache.lucene.document.Document} at the nth position * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @see org.apache.lucene.document.Fieldable * @see org.apache.lucene.document.FieldSelector * @see org.apache.lucene.document.SetBasedFieldSelector * @see org.apache.lucene.document.LoadFirstFieldSelector */ // TODO (1.5): When we convert to JDK 1.5 make this Set<String> public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException; /** Returns true if document <i>n</i> has been deleted */ public abstract boolean isDeleted(int n); /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); /** Returns true if there are norms stored for this field. */ public boolean hasNorms(String field) throws IOException { // backward compatible implementation. // SegmentReader has an efficient implementation. ensureOpen(); return norms(field) != null; } /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * Returns null if norms were not indexed for this field. * * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract byte[] norms(String field) throws IOException; /** Reads the byte-encoded normalization factor for the named field of every * document. This is used by the search code to score documents. * * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract void norms(String field, byte[] bytes, int offset) throws IOException; /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, * int) length normalization}. Thus, to preserve the length normalization * values when resetting this, one should base the new value upon the old. * * <b>NOTE:</b> If this field does not index norms, then * this method throws {@link IllegalStateException}. * * @see #norms(String) * @see Similarity#decodeNormValue(byte) * @throws StaleReaderException if the index has changed * since this reader was opened * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (<code>write.lock</code> could not * be obtained) * @throws IOException if there is a low-level IO error * @throws IllegalStateException if the field does not index norms */ public synchronized void setNorm(int doc, String field, byte value) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); acquireWriteLock(); hasChanges = true; doSetNorm(doc, field, value); } /** Implements setNorm in subclass.*/ protected abstract void doSetNorm(int doc, String field, byte value) throws CorruptIndexException, IOException; /** Expert: Resets the normalization factor for the named field of the named * document. * * @see #norms(String) * @see Similarity#decodeNormValue(byte) * * @throws StaleReaderException if the index has changed * since this reader was opened * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (<code>write.lock</code> could not * be obtained) * @throws IOException if there is a low-level IO error * @deprecated Use {@link #setNorm(int, String, byte)} instead, encoding the * float to byte with your Similarity's {@link Similarity#encodeNormValue(float)}. * This method will be removed in Lucene 4.0 */ @Deprecated public void setNorm(int doc, String field, float value) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); } /** Returns an enumeration of all the terms in the index. The * enumeration is ordered by Term.compareTo(). Each term is greater * than all that precede it in the enumeration. Note that after * calling terms(), {@link TermEnum#next()} must be called * on the resulting enumeration before calling other methods such as * {@link TermEnum#term()}. * @throws IOException if there is a low-level IO error */ public abstract TermEnum terms() throws IOException; /** Returns an enumeration of all terms starting at a given term. If * the given term does not exist, the enumeration is positioned at the * first term greater than the supplied term. The enumeration is * ordered by Term.compareTo(). Each term is greater than all that * precede it in the enumeration. * @throws IOException if there is a low-level IO error */ public abstract TermEnum terms(Term t) throws IOException; /** Returns the number of documents containing the term <code>t</code>. * @throws IOException if there is a low-level IO error */ public abstract int docFreq(Term t) throws IOException; /** Returns an enumeration of all the documents which contain * <code>term</code>. For each document, the document number, the frequency of * the term in that document is also provided, for use in * search scoring. If term is null, then all non-deleted * docs are returned with freq=1. * Thus, this method implements the mapping: * <p><ul> * Term    =>    <docNum, freq><sup>*</sup> * </ul> * <p>The enumeration is ordered by document number. Each document number * is greater than all that precede it in the enumeration. * @throws IOException if there is a low-level IO error */ public TermDocs termDocs(Term term,int buffer) throws IOException { ensureOpen(); TermDocs termDocs = termDocs(buffer); termDocs.seek(term); return termDocs; } public TermDocs termDocs(Term term) throws IOException { return termDocs(term, -1); } public TermDocs termDocs() throws IOException { return termDocs(-1); } /** Returns an unpositioned {@link TermDocs} enumerator. * <p> * Note: the TermDocs returned is unpositioned. Before using it, ensure * that you first position it with {@link TermDocs#seek(Term)} or * {@link TermDocs#seek(TermEnum)}. * * @throws IOException if there is a low-level IO error */ public abstract TermDocs termDocs(int buffer) throws IOException; /** Returns an enumeration of all the documents which contain * <code>term</code>. For each document, in addition to the document number * and frequency of the term in that document, a list of all of the ordinal * positions of the term in the document is available. Thus, this method * implements the mapping: * * <p><ul> * Term    =>    <docNum, freq, * <pos<sub>1</sub>, pos<sub>2</sub>, ... * pos<sub>freq-1</sub>> * ><sup>*</sup> * </ul> * <p> This positional information facilitates phrase and proximity searching. * <p>The enumeration is ordered by document number. Each document number is * greater than all that precede it in the enumeration. * @throws IOException if there is a low-level IO error */ public TermPositions termPositions(Term term) throws IOException { ensureOpen(); TermPositions termPositions = termPositions(); termPositions.seek(term); return termPositions; } /** Returns an unpositioned {@link TermPositions} enumerator. * @throws IOException if there is a low-level IO error */ public abstract TermPositions termPositions() throws IOException; /** Deletes the document numbered <code>docNum</code>. Once a document is * deleted it will not appear in TermDocs or TermPostitions enumerations. * Attempts to read its field with the {@link #document} * method will result in an error. The presence of this document may still be * reflected in the {@link #docFreq} statistic, though * this will be corrected eventually as the index is further modified. * * @throws StaleReaderException if the index has changed * since this reader was opened * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (<code>write.lock</code> could not * be obtained) * @throws IOException if there is a low-level IO error */ public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); acquireWriteLock(); hasChanges = true; doDelete(docNum); } /** Implements deletion of the document numbered <code>docNum</code>. * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. */ protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException; /** Deletes all documents that have a given <code>term</code> indexed. * This is useful if one uses a document field to hold a unique ID string for * the document. Then to delete such a document, one merely constructs a * term with the appropriate field and the unique ID string as its text and * passes it to this method. * See {@link #deleteDocument(int)} for information about when this deletion will * become effective. * * @return the number of documents deleted * @throws StaleReaderException if the index has changed * since this reader was opened * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (<code>write.lock</code> could not * be obtained) * @throws IOException if there is a low-level IO error */ public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); TermDocs docs = termDocs(term,-1); if (docs == null) return 0; int n = 0; try { while (docs.next()) { deleteDocument(docs.doc()); n++; } } finally { docs.close(); } return n; } /** Undeletes all documents currently marked as deleted in * this index. * * <p>NOTE: this method can only recover documents marked * for deletion but not yet removed from the index; when * and how Lucene removes deleted documents is an * implementation detail, subject to change from release * to release. However, you can use {@link * #numDeletedDocs} on the current IndexReader instance to * see how many documents will be un-deleted. * * @throws StaleReaderException if the index has changed * since this reader was opened * @throws LockObtainFailedException if another writer * has this index open (<code>write.lock</code> could not * be obtained) * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); acquireWriteLock(); hasChanges = true; doUndeleteAll(); } /** Implements actual undeleteAll() in subclass. */ protected abstract void doUndeleteAll() throws CorruptIndexException, IOException; /** Does nothing by default. Subclasses that require a write lock for * index modifications must implement this method. */ protected synchronized void acquireWriteLock() throws IOException { /* NOOP */ } /** * * @throws IOException */ public final synchronized void flush() throws IOException { ensureOpen(); commit(); } /** * @param commitUserData Opaque Map (String -> String) * that's recorded into the segments file in the index, * and retrievable by {@link * IndexReader#getCommitUserData}. * @throws IOException */ public final synchronized void flush(Map<String, String> commitUserData) throws IOException { ensureOpen(); commit(commitUserData); } /** * Commit changes resulting from delete, undeleteAll, or * setNorm operations * * If an exception is hit, then either no changes or all * changes will have been committed to the index * (transactional semantics). * @throws IOException if there is a low-level IO error */ protected final synchronized void commit() throws IOException { commit(null); } /** * Commit changes resulting from delete, undeleteAll, or * setNorm operations * * If an exception is hit, then either no changes or all * changes will have been committed to the index * (transactional semantics). * @throws IOException if there is a low-level IO error */ public final synchronized void commit(Map<String, String> commitUserData) throws IOException { // Don't call ensureOpen since we commit() on close doCommit(commitUserData); hasChanges = false; } /** Implements commit. */ protected abstract void doCommit(Map<String, String> commitUserData) throws IOException; /** * Closes files associated with this index. * Also saves any new deletions to disk. * No other methods should be called after this has been called. * @throws IOException if there is a low-level IO error */ public final synchronized void close() throws IOException { if (!closed) { decRef(); closed = true; } } /** Implements close. */ protected abstract void doClose() throws IOException; /** * Get a list of unique field names that exist in this index and have the specified * field option information. * @param fldOption specifies which field option should be available for the returned fields * @return Collection of Strings indicating the names of the fields. * @see IndexReader.FieldOption */ public abstract Collection<String> getFieldNames(FieldOption fldOption); /** * Expert: return the IndexCommit that this reader has * opened. This method is only implemented by those * readers that correspond to a Directory with its own * segments_N file. * * @lucene.experimental */ public IndexCommit getIndexCommit() throws IOException { throw new UnsupportedOperationException("This reader does not support this method."); } /** * Prints the filename and size of each file within a given compound file. * Add the -extract flag to extract files to the current working directory. * In order to make the extracted version of the index work, you have to copy * the segments file from the compound index into the directory where the extracted files are stored. * @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> */ public static void main(String [] args) { String filename = null; String cfsposfile = null; boolean extract = false; for (int i = 0; i < args.length; ++i) { if (args[i].equals("-extract")) { extract = true; } else if (filename == null) { filename = args[i]; i++; cfsposfile=args[i]; } } if (filename == null) { System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> <cfsposfile>"); return; } Directory dir = null; CompoundFileReader cfr = null; try { File file = new File(filename); String dirname = file.getAbsoluteFile().getParent(); filename = file.getName(); dir = FSDirectory.open(new File(dirname)); cfr = new CompoundFileReader(dir, filename,cfsposfile); String [] files = cfr.listAll(); ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable for (int i = 0; i < files.length; ++i) { long len = cfr.fileLength(files[i]); if (extract) { System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); IndexInput ii = cfr.openInput(files[i]); FileOutputStream f = new FileOutputStream(files[i]); // read and write with a small buffer, which is more effective than reading byte by byte byte[] buffer = new byte[1024]; int chunk = buffer.length; while(len > 0) { final int bufLen = (int) Math.min(chunk, len); ii.readBytes(buffer, 0, bufLen); f.write(buffer, 0, bufLen); len -= bufLen; } f.close(); ii.close(); } else System.out.println(files[i] + ": " + len + " bytes"); } } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (dir != null) dir.close(); if (cfr != null) cfr.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } } /** Returns all commit points that exist in the Directory. * Normally, because the default is {@link * KeepOnlyLastCommitDeletionPolicy}, there would be only * one commit point. But if you're using a custom {@link * IndexDeletionPolicy} then there could be many commits. * Once you have a given commit, you can open a reader on * it by calling {@link IndexReader#open(IndexCommit,boolean)} * There must be at least one commit in * the Directory, else this method throws {@link * IndexNotFoundException}. Note that if a commit is in * progress while this method is running, that commit * may or may not be returned. * * @return a sorted list of {@link IndexCommit}s, from oldest * to latest. */ public static Collection<IndexCommit> listCommits(Directory dir) throws IOException { return DirectoryReader.listCommits(dir); } /** Expert: returns the sequential sub readers that this * reader is logically composed of. For example, * IndexSearcher uses this API to drive searching by one * sub reader at a time. If this reader is not composed * of sequential child readers, it should return null. * If this method returns an empty array, that means this * reader is a null reader (for example a MultiReader * that has no sub readers). * <p> * NOTE: You should not try using sub-readers returned by * this method to make any changes (setNorm, deleteDocument, * etc.). While this might succeed for one composite reader * (like MultiReader), it will most likely lead to index * corruption for other readers (like DirectoryReader obtained * through {@link #open}. Use the parent reader directly. */ public IndexReader[] getSequentialSubReaders() { ensureOpen(); return null; } /** Expert */ public Object getCoreCacheKey() { // Don't can ensureOpen since FC calls this (to evict) // on close return this; } public abstract String getStringCacheKey(); /** Expert. Warning: this returns null if the reader has * no deletions */ public Object getDeletesCacheKey() { return this; } /** Returns the number of unique terms (across all fields) * in this reader. * * This method returns long, even though internally * Lucene cannot handle more than 2^31 unique terms, for * a possible future when this limitation is removed. * * @throws UnsupportedOperationException if this count * cannot be easily determined (eg Multi*Readers). * Instead, you should call {@link * #getSequentialSubReaders} and ask each sub reader for * its unique term count. */ public long getUniqueTermCount() throws IOException { throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()"); } // Back compat for reopen() @Deprecated private static final VirtualMethod<IndexReader> reopenMethod1 = new VirtualMethod<IndexReader>(IndexReader.class, "reopen"); @Deprecated private static final VirtualMethod<IndexReader> doOpenIfChangedMethod1 = new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged"); @Deprecated private final boolean hasNewReopenAPI1 = VirtualMethod.compareImplementationDistance(getClass(), doOpenIfChangedMethod1, reopenMethod1) >= 0; // its ok for both to be overridden // Back compat for reopen(boolean openReadOnly) @Deprecated private static final VirtualMethod<IndexReader> reopenMethod2 = new VirtualMethod<IndexReader>(IndexReader.class, "reopen", boolean.class); @Deprecated private static final VirtualMethod<IndexReader> doOpenIfChangedMethod2 = new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", boolean.class); @Deprecated private final boolean hasNewReopenAPI2 = VirtualMethod.compareImplementationDistance(getClass(), doOpenIfChangedMethod2, reopenMethod2) >= 0; // its ok for both to be overridden // Back compat for reopen(IndexCommit commit) @Deprecated private static final VirtualMethod<IndexReader> reopenMethod3 = new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexCommit.class); @Deprecated private static final VirtualMethod<IndexReader> doOpenIfChangedMethod3 = new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexCommit.class); @Deprecated private final boolean hasNewReopenAPI3 = VirtualMethod.compareImplementationDistance(getClass(), doOpenIfChangedMethod3, reopenMethod3) >= 0; // its ok for both to be overridden // Back compat for reopen(IndexWriter writer, boolean applyDeletes) @Deprecated private static final VirtualMethod<IndexReader> reopenMethod4 = new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexWriter.class, boolean.class); @Deprecated private static final VirtualMethod<IndexReader> doOpenIfChangedMethod4 = new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexWriter.class, boolean.class); @Deprecated private final boolean hasNewReopenAPI4 = VirtualMethod.compareImplementationDistance(getClass(), doOpenIfChangedMethod4, reopenMethod4) >= 0; // its ok for both to be overridden /** For IndexReader implementations that use * TermInfosReader to read terms, this returns the * current indexDivisor as specified when the reader was * opened. */ public int getTermInfosIndexDivisor() { throw new UnsupportedOperationException("This reader does not support this method."); } }