/* * Copyright 2004-2009 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.compass.core.lucene.engine.transaction.async; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.compass.core.CompassException; import org.compass.core.config.CompassConfigurable; import org.compass.core.config.CompassSettings; import org.compass.core.config.SearchEngineFactoryAware; import org.compass.core.engine.SearchEngineException; import org.compass.core.engine.SearchEngineFactory; import org.compass.core.lucene.LuceneEnvironment; import org.compass.core.lucene.engine.LuceneSearchEngine; import org.compass.core.lucene.engine.LuceneSearchEngineFactory; import org.compass.core.lucene.engine.manager.LuceneSearchEngineIndexManager; import org.compass.core.lucene.engine.transaction.TransactionProcessor; import org.compass.core.lucene.engine.transaction.TransactionProcessorFactory; import org.compass.core.lucene.engine.transaction.support.CommitCallable; import org.compass.core.lucene.engine.transaction.support.PrepareCommitCallable; import org.compass.core.lucene.engine.transaction.support.ResourceHashing; import org.compass.core.lucene.engine.transaction.support.job.TransactionJob; import org.compass.core.lucene.engine.transaction.support.job.TransactionJobs; import org.compass.core.transaction.context.TransactionalCallable; /** * A transaction processor that created {@link org.compass.core.lucene.engine.transaction.async.AsyncTransactionProcessor} * instances. Supports async execution of transactions against the index. A transaction (which includes several dirty * operations) is packaged into a single operation which is then applied to the index asynchronously. * * <p>Note, when several instances of Compass are running using this transaction processor, order of transactions is * not maintained, which might result in out of order transaction being applied to the index. * * <p>The number of transactions that have not been processed (backlog) are bounded and default to <code>10</code>. * If the processor is falling behind in processing transactions, commit operations will block until the backlog * lowers below its threshold. The backlog can be set using the {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#BACKLOG}. * Commit operations will block by default for 10 seconds in order for the backlog to lower below its threshold. It * can be changed using the {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#ADD_TIMEOUT} * setting. * * <p>Processing of transactions is done by a background thread that waits for transactions. Once there is a transaction * to process, it will first try to wait for additional transactions. It will block for 100 milliseconds (configurable * using {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#BATCH_JOBS_TIMEOUT}), and if one * was added, will wait again up to 5 times (configurable using {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#BATCH_JOBS_SIZE}). * Once batch jobs based on timeout is done, the processor will try to get up to 5 more transactions in a non blocking * manner (configurable using {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#NON_BLOCKING_BATCH_JOBS_SIZE}). * * <p>When all transaction jobs are accumulated, the processor starts up to 5 threads * (configurable using {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#CONCURRENCY_LEVEL}) * in order to process all the transaction jobs against the index. Hashing of actual operation (create/update/delete) * can either be done based on uid (of the resource) or sub index. By default, hashing is done based on <code>uid</code> * and can be configured using {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#HASHING}. * * <p>When the transaction processor closes, by default it will wait for all the transactions to finish. In order to * disable it, the {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#PROCESS_BEFORE_CLOSE} * setting should be set to <code>false</code>. * * @author kimchy */ public class AsyncTransactionProcessorFactory implements TransactionProcessorFactory, CompassConfigurable, SearchEngineFactoryAware { private static Log logger = LogFactory.getLog(AsyncTransactionProcessorFactory.class); private CompassSettings settings; private LuceneSearchEngineFactory searchEngineFactory; private LuceneSearchEngineIndexManager indexManager; private boolean processBeforeClose; private int concurrencyLevel; private long addTimeout; private int batchJobsSize; private long batchJobTimeout; private int nonBlockingBatchSize; private ResourceHashing hashing; private BlockingQueue<TransactionJobs> jobsToProcess; private Future pollingProcessorFuture; private volatile PollingProcessor pollingProcessor; private volatile boolean closed = false; public void setSearchEngineFactory(SearchEngineFactory searchEngineFactory) { this.searchEngineFactory = (LuceneSearchEngineFactory) searchEngineFactory; this.indexManager = this.searchEngineFactory.getLuceneIndexManager(); } public void configure(CompassSettings settings) throws CompassException { this.settings = settings; jobsToProcess = new ArrayBlockingQueue<TransactionJobs>(settings.getSettingAsInt(LuceneEnvironment.Transaction.Processor.Async.BACKLOG, 10), true); addTimeout = settings.getSettingAsTimeInMillis(LuceneEnvironment.Transaction.Processor.Async.ADD_TIMEOUT, 10 * 1000); if (logger.isDebugEnabled()) { logger.debug("Async Transaction Processor will wait for [" + addTimeout + "ms] if backlog is full"); } batchJobsSize = settings.getSettingAsInt(LuceneEnvironment.Transaction.Processor.Async.BATCH_JOBS_SIZE, 5); batchJobTimeout = settings.getSettingAsTimeInMillis(LuceneEnvironment.Transaction.Processor.Async.BATCH_JOBS_SIZE, 100); if (logger.isDebugEnabled()) { logger.debug("Async Transaction Processor blocking batch size is [" + batchJobsSize + "] with timeout of [" + batchJobTimeout + "ms]"); } nonBlockingBatchSize = settings.getSettingAsInt(LuceneEnvironment.Transaction.Processor.Async.NON_BLOCKING_BATCH_JOBS_SIZE, 5); if (logger.isDebugEnabled()) { logger.debug("Async Transaction Processor non blocking batch size is [" + nonBlockingBatchSize + "]"); } processBeforeClose = settings.getSettingAsBoolean(LuceneEnvironment.Transaction.Processor.Async.PROCESS_BEFORE_CLOSE, true); if (logger.isDebugEnabled()) { logger.debug("Async Transaction Processor process before close is set to [" + processBeforeClose + "]"); } this.concurrencyLevel = settings.getSettingAsInt(LuceneEnvironment.Transaction.Processor.Async.CONCURRENCY_LEVEL, 5); if (logger.isDebugEnabled()) { logger.debug("Async Transaction Processor will use [" + concurrencyLevel + "] concrrent threads to process transactions"); } hashing = ResourceHashing.fromName(settings.getSetting(LuceneEnvironment.Transaction.Processor.Async.HASHING, "uid")); if (logger.isDebugEnabled()) { logger.debug("Async Transaction Processor uses [" + hashing + "] based hashing for concurrent processing"); } if (logger.isDebugEnabled()) { logger.debug("Starting Async polling transaction processor"); } } /** * Closes the transaction processor. Will wait for ongoing transactions if the * {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#PROCESS_BEFORE_CLOSE} is set to * <code>true</code> (the default). */ public synchronized void close() { closed = true; if (processBeforeClose && pollingProcessor != null) { // TODO don't sleep forever (can be implemented nicely with singal) while (!jobsToProcess.isEmpty()) { try { this.wait(100); } catch (InterruptedException e) { // break out break; } } } if (pollingProcessor != null) { try { pollingProcessor.close(); pollingProcessorFuture.cancel(true); while (!pollingProcessor.isDone()) { try { this.wait(100); } catch (InterruptedException e) { // break out break; } } } finally { pollingProcessor = null; pollingProcessorFuture = null; } } } /** * Creates a new {@link org.compass.core.lucene.engine.transaction.async.AsyncTransactionProcessor}. */ public TransactionProcessor create(LuceneSearchEngine searchEngine) { return new AsyncTransactionProcessor(searchEngine, this); } /** * Async transaction processor is not thread safe. */ public boolean isThreadSafe() { return false; } /** * Removed (if still pending) the given {@link org.compass.core.lucene.engine.transaction.support.job.TransactionJobs} * from being processed. */ public boolean remove(TransactionJobs jobs) throws SearchEngineException { return jobsToProcess.remove(jobs); } /** * Adds the {@link org.compass.core.lucene.engine.transaction.support.job.TransactionJobs} to be processed * asynchronously. If a procesing threads has not started, will start it (it is started lazily so if the * async transaction processor is not used, it won't incur any overhead). * * <p>The addition of {@link org.compass.core.lucene.engine.transaction.support.job.TransactionJobs} is "offered" * to a blocking queue, waiting until the queue if cleared in case it is full. This will cause a transaction * commit to block if the backlog is full. The time to wait can be controlled using * {@link org.compass.core.lucene.LuceneEnvironment.Transaction.Processor.Async#ADD_TIMEOUT} and defaults to * 10 seconds. */ public void add(TransactionJobs jobs) throws SearchEngineException { if (pollingProcessor == null) { synchronized (this) { if (pollingProcessor == null) { this.pollingProcessor = new PollingProcessor(); pollingProcessorFuture = searchEngineFactory.getExecutorManager().submit(pollingProcessor); } } } try { boolean offered = jobsToProcess.offer(jobs, addTimeout, TimeUnit.MILLISECONDS); if (!offered) { throw new SearchEngineException("Failed to add jobs [" + System.identityHashCode(jobs) + "], queue is full and nothing empties it"); } } catch (InterruptedException e) { throw new SearchEngineException("Failed to add jobs [" + System.identityHashCode(jobs) + "], interrupted", e); } } private void process(TransactionJobs jobs) throws InterruptedException { Set<String> subIndexes = new HashSet<String>(); List<TransactionJob>[] concurrentJobsToProcess = new List[concurrencyLevel]; for (int i = 0; i < concurrentJobsToProcess.length; i++) { concurrentJobsToProcess[i] = new ArrayList<TransactionJob>(); } // build the concurrent job list of lists addConcurrentJobsToProcess(concurrentJobsToProcess, subIndexes, jobs); // spin a bit to get more possible jobs, if enabled (batchJobSize is set to higher value than 0) for (int i = 0; i < batchJobsSize; i++) { jobs = jobsToProcess.poll(batchJobTimeout, TimeUnit.MILLISECONDS); if (jobs == null) { break; } if (logger.isTraceEnabled()) { logger.trace("Batching additional Jobs [" + System.identityHashCode(jobs) + "]"); } addConcurrentJobsToProcess(concurrentJobsToProcess, subIndexes, jobs); } // now spin non blocking List<TransactionJobs> nonBlockingDrainToList = new ArrayList<TransactionJobs>(); if (jobsToProcess.drainTo(nonBlockingDrainToList, nonBlockingBatchSize) > 0) { for (TransactionJobs transactionJobs : nonBlockingDrainToList) { if (logger.isTraceEnabled()) { logger.trace("Batching additional Jobs [" + System.identityHashCode(transactionJobs) + "]"); } addConcurrentJobsToProcess(concurrentJobsToProcess, subIndexes, transactionJobs); } } boolean failure = false; Map<String, IndexWriter> writers = new HashMap<String, IndexWriter>(); // open index writers for (String subIndex : subIndexes) { try { IndexWriter writer = indexManager.getIndexWritersManager().openIndexWriter(settings, subIndex); indexManager.getIndexWritersManager().trackOpenIndexWriter(subIndex, writer); writers.put(subIndex, writer); } catch (Exception e) { logger.warn("Failed to open index writer for sub index [" + subIndex + "]", e); failure = true; break; } } if (failure) { closeWriters(writers); return; } // process all the jobs by multiple threads ArrayList<Callable<Object>> processCallables = new ArrayList<Callable<Object>>(); for (List<TransactionJob> list : concurrentJobsToProcess) { if (list.isEmpty()) { // no need to create a thread for empty list continue; } processCallables.add(new TransactionalCallable(indexManager.getTransactionContext(), new TransactionJobProcessor(list, writers))); } try { indexManager.getExecutorManager().invokeAllWithLimitBailOnException(processCallables, 1); } catch (Exception e) { logger.warn("Failed to index", e); failure = true; } if (failure) { rollbackWriters(writers); return; } // prepare for commit ArrayList<Callable<Object>> prepareCallables = new ArrayList<Callable<Object>>(); for (Map.Entry<String, IndexWriter> entry : writers.entrySet()) { prepareCallables.add(new TransactionalCallable(indexManager.getTransactionContext(), new PrepareCommitCallable(entry.getKey(), entry.getValue()))); } try { indexManager.getExecutorManager().invokeAllWithLimitBailOnException(prepareCallables, 1); } catch (Exception e) { logger.warn("Faield to prepare commit", e); failure = true; } if (failure) { rollbackWriters(writers); return; } // commit ArrayList<Callable<Object>> commitCallables = new ArrayList<Callable<Object>>(); for (Map.Entry<String, IndexWriter> entry : writers.entrySet()) { commitCallables.add(new TransactionalCallable(indexManager.getTransactionContext(), new CommitCallable(indexManager, entry.getKey(), entry.getValue(), isClearCacheOnCommit()))); } try { indexManager.getExecutorManager().invokeAllWithLimitBailOnException(commitCallables, 1); } catch (Exception e) { logger.warn("Failed to commit", e); } } /** * Closes all the list of writers passed as part of the writers map. */ private void closeWriters(Map<String, IndexWriter> writers) { for (Map.Entry<String, IndexWriter> entry : writers.entrySet()) { try { entry.getValue().close(); } catch (AlreadyClosedException e) { if (logger.isTraceEnabled()) { logger.trace("Failed to close transaction for sub index [" + entry.getKey() + "] since it is alreayd closed"); } } catch (IOException e) { Directory dir = searchEngineFactory.getLuceneIndexManager().getStore().openDirectory(entry.getKey()); try { if (IndexWriter.isLocked(dir)) { IndexWriter.unlock(dir); } } catch (Exception e1) { logger.warn("Failed to check for locks or unlock failed commit for sub index [" + entry.getKey() + "]", e); } logger.warn("Failed to close index writer for sub index [" + entry.getKey() + "]", e); } finally { searchEngineFactory.getLuceneIndexManager().getIndexWritersManager().trackCloseIndexWriter(entry.getKey(), entry.getValue()); } } writers.clear(); } /** * Rolls back all the list of writers passed as part of the writers map. */ private void rollbackWriters(Map<String, IndexWriter> writers) { SearchEngineException exception = null; for (Map.Entry<String, IndexWriter> entry : writers.entrySet()) { try { entry.getValue().rollback(); } catch (AlreadyClosedException e) { if (logger.isTraceEnabled()) { logger.trace("Failed to abort transaction for sub index [" + entry.getKey() + "] since it is alreayd closed"); } } catch (IOException e) { Directory dir = searchEngineFactory.getLuceneIndexManager().getStore().openDirectory(entry.getKey()); try { if (IndexWriter.isLocked(dir)) { IndexWriter.unlock(dir); } } catch (Exception e1) { logger.warn("Failed to check for locks or unlock failed commit for sub index [" + entry.getKey() + "]", e); } exception = new SearchEngineException("Failed to rollback transaction for sub index [" + entry.getKey() + "]", e); } finally { searchEngineFactory.getLuceneIndexManager().getIndexWritersManager().trackCloseIndexWriter(entry.getKey(), entry.getValue()); } } writers.clear(); } private void addConcurrentJobsToProcess(List<TransactionJob>[] concurrentJobsToProcess, Set<String> subIndexes, TransactionJobs jobs) { subIndexes.addAll(jobs.getSubIndexes()); for (TransactionJob job : jobs.getJobs()) { concurrentJobsToProcess[hashing.hash(job) % concurrencyLevel].add(job); } } protected boolean isClearCacheOnCommit() { return settings.getSettingAsBoolean(LuceneEnvironment.Transaction.CLEAR_CACHE_ON_COMMIT, true); } private class PollingProcessor implements Callable<Object> { private volatile boolean closed = false; private volatile boolean done = false; public void close() { this.closed = true; } public boolean isDone() { return this.done; } public Object call() throws Exception { while (!closed) { try { TransactionJobs jobs = jobsToProcess.poll(10, TimeUnit.SECONDS); if (jobs == null) { continue; } if (logger.isTraceEnabled()) { logger.trace("Procesing jobs [" + System.identityHashCode(jobs) + "]"); } process(jobs); if (logger.isTraceEnabled()) { logger.trace("Procesing jobs done"); } } catch (InterruptedException e) { if (closed) { break; } if (logger.isTraceEnabled()) { logger.trace("Polling for transaction jobs interrupted", e); } // continue here, since when we get interrupted, the closed flag should be set to true } catch (Exception e) { // non handled exception within process, log it if (logger.isWarnEnabled()) { logger.warn("Exception while processing job", e); } } } if (logger.isDebugEnabled()) { logger.debug("Async polling transaction processor thread stopped"); } this.done = true; return null; } } private class TransactionJobProcessor implements Callable { private final List<TransactionJob> jobsToProcess; private final Map<String, IndexWriter> writers; private TransactionJobProcessor(List<TransactionJob> jobsToProcess, Map<String, IndexWriter> writers) { this.jobsToProcess = jobsToProcess; this.writers = writers; } public Object call() throws Exception { for (TransactionJob job : jobsToProcess) { IndexWriter writer = writers.get(job.getSubIndex()); job.execute(writer, searchEngineFactory); } return null; } } }