/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.backend.impl.lucene; import java.io.IOException; import java.util.Properties; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.hibernate.search.backend.AddLuceneWork; import org.hibernate.search.backend.DeleteLuceneWork; import org.hibernate.search.backend.FlushLuceneWork; import org.hibernate.search.backend.IndexWorkVisitor; import org.hibernate.search.backend.LuceneWork; import org.hibernate.search.backend.OptimizeLuceneWork; import org.hibernate.search.backend.PurgeAllLuceneWork; import org.hibernate.search.backend.UpdateLuceneWork; import org.hibernate.search.backend.impl.CommitPolicy; import org.hibernate.search.backend.spi.DeleteByQueryLuceneWork; import org.hibernate.search.exception.AssertionFailure; import org.hibernate.search.indexes.spi.DirectoryBasedIndexManager; import org.hibernate.search.indexes.spi.DirectoryBasedReaderProvider; import org.hibernate.search.spi.WorkerBuildContext; import org.hibernate.search.util.logging.impl.Log; import org.hibernate.search.util.logging.impl.LoggerFactory; /** * A {@code Workspace} implementation taking advantage of NRT Lucene features. * {@code IndexReader} instances are obtained directly from the {@code IndexWriter}, which is not forced * to flush all pending changes to the {@code Directory} structure. * <p> * * Lucene requires in its current version to flush delete operations, or the {@code IndexReader}s * retrieved via NRT will include deleted Document instances in queries; flushing delete operations * happens to be quite expensive so this {@code Workspace} implementation attempts to detect when such * a flush operation is needed. * <p> * * Applying write operations flags "indexReader requirements" with needs for either normal flush * or flush including deletes, but does not update {@code IndexReader} instances. The {@code IndexReader}s * are updated only if and when a fresh {@code IndexReader} is requested via {@link #openIndexReader()}. * This method will check if it can return the last opened {@code IndexReader} or in case of the reader being stale * open a fresh reader from the current {@code IndexWriter}. * <p> * * Generation counters are used to track need-at-least version versus last-updated-at version: * shared state is avoided between index writers and reader threads to avoid high complexity. * The method {@link #afterTransactionApplied(boolean, boolean)} might trigger multiple times flagging * the index to be dirty without triggering an actual {@code IndexReader} refresh, so the version counters * can have gaps: method {@link #refreshReaders()} will always jump to latest seen version, as it will * refresh the index to satisfy both kinds of flush requirements (writes and deletes). * <p> * * We keep a reference {@code IndexReader} in the {@link #currentReader} atomic reference as a fast path * for multiple read events when the index is not dirty. * <p> * * This class implements both {@code Workspace} and {@code ReaderProvider}. * * @author Sanne Grinovero (C) 2011 Red Hat Inc. */ public class NRTWorkspaceImpl extends AbstractWorkspaceImpl implements DirectoryBasedReaderProvider { private static final Log log = LoggerFactory.make(); private final ReentrantLock writeLock = new ReentrantLock(); private final AtomicReference<DirectoryReader> currentReader = new AtomicReference<DirectoryReader>(); private final CommitPolicy commitPolicy = new NRTCommitPolicy( writerHolder ); /** * Visits {@code LuceneWork} types and applies the required kind of index flushing */ private final FlushStrategyExecutor flushStrategySelector = new FlushStrategyExecutor(); /** * Set to true when this service is shutdown (not revertible) */ private boolean shutdown = false; /** * When true a flush operation should make sure all write operations are flushed, * otherwise a simpler flush strategy can be picked. */ private final AtomicBoolean needFlushWrites = new AtomicBoolean( true ); /** * Often when flushing deletes don't need to be applied. Some operation might have requested otherwise: */ private final AtomicBoolean needFlushDeletes = new AtomicBoolean( false ); /** * Internal counter used to mark different generations of IndexReaders. Monotonic incremental. * Not expecting an overflow in this planet's lifetime. */ private final AtomicLong readerGeneration = new AtomicLong( 0 ); /** * When refreshing an {@code IndexReader} to achieve a fresh snapshot to a generation, we need to check this * value to see if deletions need to be flushed. We try hard to not flush deletions as that is a * very expensive operation. * NOTE: concurrently accessed. Guarded by readerGenRequiringFlushWrites: read the other first, write it last. */ private long readerGenRequiringFlushDeletes = 0; /** * As with {@link #readerGenRequiringFlushDeletes}, if this value is above the value of {@link #currentReaderGen} * a new {@code IndexReader} should be opened as the current generation is stale. */ private volatile long readerGenRequiringFlushWrites = 0; /** * Generation identifier of the current open {@code IndexReader} (the one stored in {@link #currentReader} */ private volatile long currentReaderGen = 0; public NRTWorkspaceImpl(DirectoryBasedIndexManager indexManager, WorkerBuildContext buildContext, Properties cfg) { super( indexManager, buildContext, cfg ); } @Override public void afterTransactionApplied(boolean someFailureHappened, boolean streaming) { commitPolicy.onChangeSetApplied( someFailureHappened, streaming ); if ( ! streaming ) { setupNewReadersRequirements(); } } /** * Translates fields as{@code needFlushWrites} and {@code needFlushDeletes} in a set of requirements as checked * by reader threads. This is commonly invoked by a single thread (so no contention on this method * is expected) but it needs to expose a consistent view of the written fields to {@link #refreshReaders()}. * This is normally not invoked in parallel by multiple threads as the backend design allows a single working thread * per index, but it could be invoked concurrently when streaming work is being applied (when a MassIndexer is * running). Note that multiple threads invoking this in parallel might result in skipping some sequence numbers * but that's not a problem. */ private void setupNewReadersRequirements() { if ( needFlushDeletes.get() || needFlushWrites.get() ) { final long nextGenId = readerGeneration.incrementAndGet(); if ( needFlushDeletes.get() ) { this.needFlushDeletes.lazySet( false ); //flushed by volatile write at end of method this.readerGenRequiringFlushDeletes = nextGenId; //flushed by volatile write at end of method } this.needFlushWrites.lazySet( false ); //flushed by volatile write at end of method this.readerGenRequiringFlushWrites = nextGenId; } } /** * Invoked when a refresh of current {@code IndexReader}s is detected necessary. * * The implementation is blocking to maximize reuse of a single {@code IndexReader} (better for buffer usage, * caching, ..) and to avoid multiple threads trying and opening the same resources at the same time. * * @return the refreshed {@code IndexReader} */ private synchronized DirectoryReader refreshReaders() { //double-check for the case we don't need anymore to refresh if ( indexReaderIsFresh() ) { return currentReader.get(); } //order of the following two reads DOES matter: final long readerGenRequiringFlushWrites = this.readerGenRequiringFlushWrites; final long readerGenRequiringFlushDeletes = this.readerGenRequiringFlushDeletes; final boolean flushDeletes = currentReaderGen < readerGenRequiringFlushDeletes; final long openingGen = Math.max( readerGenRequiringFlushDeletes, readerGenRequiringFlushWrites ); final DirectoryReader newIndexReader = writerHolder.openNRTIndexReader( flushDeletes ); final DirectoryReader oldReader = currentReader.getAndSet( newIndexReader ); this.currentReaderGen = openingGen; try { if ( oldReader != null ) { oldReader.decRef(); } } catch (IOException e) { log.unableToCloseLuceneIndexReader( e ); } return newIndexReader; } private boolean indexReaderIsFresh() { final long currentReaderGen = this.currentReaderGen; //Note it reads the volatile first. These two longs are always updated in pairs. return currentReaderGen >= readerGenRequiringFlushWrites && currentReaderGen >= readerGenRequiringFlushDeletes; } @Override public DirectoryReader openIndexReader() { return openIndexReader( ! indexReaderIsFresh() ); } /** * @param needRefresh when {@code false} it won't guarantee the index reader to be affected by "latest" changes * @return returns an {@code IndexReader} instance, either pooled or a new one */ private DirectoryReader openIndexReader(final boolean needRefresh) { DirectoryReader indexReader; if ( needRefresh ) { indexReader = refreshReaders(); } else { indexReader = currentReader.get(); } if ( indexReader == null ) { writeLock.lock(); try { if ( shutdown ) { throw new AssertionFailure( "IndexReader requested after ReaderProvider is shutdown" ); } indexReader = currentReader.get(); if ( indexReader == null ) { indexReader = writerHolder.openDirectoryIndexReader(); currentReader.set( indexReader ); } } finally { writeLock.unlock(); } } if ( indexReader.tryIncRef() ) { return indexReader; } else { //In this case we have a race: the chosen IndexReader was closed before we could increment its reference, so we need //to try again. Basically an optimistic lock as the race condition is very unlikely. //Changes should be tested at least with ReadWriteParallelismTest (in the performance tests module). //In case new writes happened there is no need to refresh again. return openIndexReader( false ); } } @Override public void closeIndexReader(IndexReader reader) { if ( reader == null ) { return; } try { //don't use IndexReader#close as it prevents further counter decrements! reader.decRef(); } catch (IOException e) { log.unableToCloseLuceneIndexReader( e ); } } @Override public void initialize(DirectoryBasedIndexManager indexManager, Properties props) { } @Override public void stop() { writeLock.lock(); try { final IndexReader oldReader = currentReader.getAndSet( null ); closeIndexReader( oldReader ); shutdown = true; } finally { writeLock.unlock(); } } @Override public void flush() { //Even if this is the NRT workspace, Flush is implemented as a real Flush to make sure //MassIndexer output is committed to permanent storage commitPolicy.onFlush(); } @Override public void notifyWorkApplied(LuceneWork work) { incrementModificationCounter(); work.acceptIndexWorkVisitor( flushStrategySelector, this ); } @Override public CommitPolicy getCommitPolicy() { return commitPolicy; } /** * Visits each kind of {@code LuceneWork} we're processing and applies the correct flushing strategy to create * consistent index readers. */ private static class FlushStrategyExecutor implements IndexWorkVisitor<NRTWorkspaceImpl, Void> { @Override public Void visitAddWork(AddLuceneWork addLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.FLUSH_WRITES.apply( p ); return null; } @Override public Void visitDeleteWork(DeleteLuceneWork deleteLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.FLUSH_DELETIONS.apply( p ); return null; } @Override public Void visitOptimizeWork(OptimizeLuceneWork optimizeLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.NONE.apply( p ); return null; } @Override public Void visitPurgeAllWork(PurgeAllLuceneWork purgeAllLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.FLUSH_DELETIONS.apply( p ); return null; } @Override public Void visitUpdateWork(UpdateLuceneWork updateLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.FLUSH_WRITES_AND_DELETES.apply( p ); return null; } @Override public Void visitFlushWork(FlushLuceneWork flushLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.FLUSH_WRITES_AND_DELETES.apply( p ); return null; } @Override public Void visitDeleteByQueryWork(DeleteByQueryLuceneWork deleteByQueryLuceneWork, NRTWorkspaceImpl p) { FlushStrategy.FLUSH_DELETIONS.apply( p ); return null; } } private enum FlushStrategy { NONE { @Override void apply(final NRTWorkspaceImpl workspace) { } }, FLUSH_DELETIONS { @Override void apply(final NRTWorkspaceImpl workspace) { // AtomicBoolean#lazySet is good enough as we only want to provide reads consistent with the state // the application is expecting. If for example no other flush is happening down the road // (which will eventually flush this write too) we're fine for other cores to "see" // IndexReader instances slightly stale. workspace.needFlushDeletes.lazySet( true ); } }, FLUSH_WRITES { @Override void apply(final NRTWorkspaceImpl workspace) { //See FLUSH_DELETIONS for why #lazySet is good enough. workspace.needFlushWrites.lazySet( true ); } }, FLUSH_WRITES_AND_DELETES { @Override void apply(NRTWorkspaceImpl workspace) { FLUSH_DELETIONS.apply( workspace ); FLUSH_WRITES.apply( workspace ); } }; abstract void apply(NRTWorkspaceImpl workspace); } }