ElasticsearchWorkProcessor.java example

Explorer
hibernate-search-master
/*
 * Hibernate Search, full-text search for your domain model
 *
 * License: GNU Lesser General Public License (LGPL), version 2.1 or later
 * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
 */
package org.hibernate.search.elasticsearch.processor.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.impl.lucene.MultiWriteDrainableLinkedList;
import org.hibernate.search.elasticsearch.client.impl.ElasticsearchClient;
import org.hibernate.search.elasticsearch.gson.impl.GsonProvider;
import org.hibernate.search.elasticsearch.logging.impl.Log;
import org.hibernate.search.elasticsearch.work.impl.BulkRequestFailedException;
import org.hibernate.search.elasticsearch.work.impl.BulkableElasticsearchWork;
import org.hibernate.search.elasticsearch.work.impl.ElasticsearchWork;
import org.hibernate.search.elasticsearch.work.impl.ElasticsearchWorkAggregator;
import org.hibernate.search.elasticsearch.work.impl.ElasticsearchWorkExecutionContext;
import org.hibernate.search.elasticsearch.work.impl.factory.ElasticsearchWorkFactory;
import org.hibernate.search.exception.ErrorHandler;
import org.hibernate.search.exception.impl.ErrorContextBuilder;
import org.hibernate.search.spi.BuildContext;
import org.hibernate.search.util.impl.Executors;
import org.hibernate.search.util.logging.impl.LoggerFactory;

/**
 * Executes single or multiple {@link ElasticsearchWork}s against the Elasticsearch server.
 * <p>
 * When processing multiple requests, bulk requests will be formed and executed as far as possible.
 * <p>
 * Requests can be processed synchronously or asynchronously. In the latter case, incoming requests are added to a queue
 * via {@link AsyncBackendRequestProcessor} from where a worker runnable will process them in bulks.
 *
 * @author Gunnar Morling
 */
public class ElasticsearchWorkProcessor implements AutoCloseable {

	private static final Log LOG = LoggerFactory.make( Log.class );

	/**
	 * Maximum number of requests sent in a single bulk. Could be made an option if needed.
	 */
	private static final int MAX_BULK_SIZE = 250;

	private final AsyncBackendRequestProcessor asyncProcessor;
	private final ErrorHandler errorHandler;
	private final ElasticsearchClient client;
	private final GsonProvider gsonProvider;
	private final ElasticsearchWorkFactory workFactory;
	private final ElasticsearchWorkExecutionContext parallelWorkExecutionContext;

	public ElasticsearchWorkProcessor(BuildContext context,
			ElasticsearchClient client, GsonProvider gsonProvider, ElasticsearchWorkFactory workFactory) {
		asyncProcessor = new AsyncBackendRequestProcessor();
		this.errorHandler = context.getErrorHandler();
		this.client = client;
		this.gsonProvider = gsonProvider;
		this.workFactory = workFactory;
		this.parallelWorkExecutionContext =
				new ParallelWorkExecutionContext( client, gsonProvider );
	}

	@Override
	public void close() {
		awaitAsyncProcessingCompletion();
		asyncProcessor.shutdown();
	}

	/**
	 * Executes a work synchronously, potentially throwing exceptions (the error handler isn't used).
	 *
	 * @param work The work to be executed.
	 * @return The result of the given work.
	 */
	public <T> T executeSyncUnsafe(ElasticsearchWork<T> work) {
		return work.execute( parallelWorkExecutionContext );
	}

	/**
	 * Executes works synchronously, passing any thrown exception to the error handler.
	 *
	 * @param works The works to be executed.
	 */
	public void executeSyncSafe(Iterable<ElasticsearchWork<?>> works) {
		executeSafely( works );
	}

	/**
	 * Executes a work asynchronously, passing any exception to the error handler.
	 *
	 * @param work The work to be executed.
	 */
	public void executeAsync(ElasticsearchWork<?> work) {
		asyncProcessor.submitRequest( work );
	}

	/**
	 * Blocks until the queue of requests scheduled for asynchronous processing has been fully processed.
	 * N.B. if more work is added to the queue in the meantime, this might delay the wait.
	 */
	public void awaitAsyncProcessingCompletion() {
		asyncProcessor.awaitCompletion();
	}

	/**
	 * Groups the given work list into executable bulks and executes them. For each bulk, the error handler - if
	 * registered - will be invoked with the items of that bulk.
	 */
	private void executeSafely(Iterable<ElasticsearchWork<?>> requests) {
		SequentialWorkExecutionContext context = new SequentialWorkExecutionContext(
				client, gsonProvider, workFactory, this, errorHandler );

		for ( ElasticsearchWork<?> work : createRequestGroups( requests, true ) ) {
			executeSafely( work, context );
		}

		context.flush();
	}

	private void executeSafely(ElasticsearchWork<?> work, ElasticsearchWorkExecutionContext context) {
		if ( LOG.isTraceEnabled() ) {
			LOG.tracef( "Processing %s", work );
		}

		try {
			work.execute( context );
		}
		catch (BulkRequestFailedException brfe) {
			ErrorContextBuilder builder = new ErrorContextBuilder();
			List<LuceneWork> allWorks = new ArrayList<>();

			for ( BulkableElasticsearchWork<?> successfulWork : brfe.getSuccessfulItems().keySet() ) {
				successfulWork.getLuceneWorks().forEach( (w) -> {
						allWorks.add( w );
						builder.workCompleted( w );
				});
			}

			for ( BulkableElasticsearchWork<?> failedWork : brfe.getErroneousItems() ) {
				failedWork.getLuceneWorks().forEach( (w) -> {
						allWorks.add( w );
						builder.addWorkThatFailed( w );
				});
			}

			builder.allWorkToBeDone( allWorks );

			builder.errorThatOccurred( brfe );

			errorHandler.handle( builder.createErrorContext() );
		}
		catch (RuntimeException e) {
			ErrorContextBuilder builder = new ErrorContextBuilder();
			List<LuceneWork> allWorks = new ArrayList<>();

			work.getLuceneWorks().forEach( (w) -> {
					allWorks.add( w );
					builder.addWorkThatFailed( w );
			});

			builder.allWorkToBeDone( allWorks );

			builder.errorThatOccurred( e );

			errorHandler.handle( builder.createErrorContext() );
		}
	}

	/**
	 * Organizes the given work list into {@link ProcessorWork}s to be executed.
	 */
	private List<ElasticsearchWork<?>> createRequestGroups(Iterable<ElasticsearchWork<?>> requests, boolean refreshInBulkAPICall) {
		ProcessorWorkGroupBuilder bulkBuilder = new ProcessorWorkGroupBuilder( refreshInBulkAPICall );

		for ( ElasticsearchWork<?> request : requests ) {
			request.aggregate( bulkBuilder );
		}

		return bulkBuilder.build();
	}

	/**
	 * Processes requests asynchronously.
	 * <p>
	 * Incoming messages are submitted to a queue. A worker runnable takes all messages in the queue at a given time and
	 * processes them as a bulk as far as possible. The worker is started upon first message arrival after the queue has
	 * been emptied and remains active until the queue is empty again.
	 *
	 * @author Gunnar Morling
	 */
	private class AsyncBackendRequestProcessor {

		private final ScheduledExecutorService scheduler;
		private final MultiWriteDrainableLinkedList<ElasticsearchWork<?>> asyncWorkQueue;
		private final AtomicBoolean asyncWorkerWasStarted;

		private volatile CountDownLatch lastAsyncWorkLatch;

		private AsyncBackendRequestProcessor() {
			asyncWorkQueue = new MultiWriteDrainableLinkedList<>();
			scheduler = Executors.newScheduledThreadPool( "Elasticsearch AsyncBackendRequestProcessor" );
			asyncWorkerWasStarted = new AtomicBoolean( false );
		}

		public void submitRequest(ElasticsearchWork<?> request) {
			asyncWorkQueue.add( request );

			// Set up worker if needed
			if ( !asyncWorkerWasStarted.get() ) {
				synchronized ( AsyncBackendRequestProcessor.this ) {
					if ( asyncWorkerWasStarted.compareAndSet( false, true ) ) {
						try {
							RequestProcessingRunnable runnable = new RequestProcessingRunnable( this );
							scheduler.schedule( runnable, 100, TimeUnit.MILLISECONDS );
							//only assign this when the job was successfully scheduled:
							lastAsyncWorkLatch = runnable.latch;
						}
						catch (Exception e) {
							// Make sure a failure to setup the worker doesn't leave other threads waiting indefinitely:
							asyncWorkerWasStarted.set( false );
							final CountDownLatch latch = lastAsyncWorkLatch;
							if ( latch != null ) {
								latch.countDown();
							}
							throw e;
						}
					}
				}
			}
		}

		public void awaitCompletion() {
			final CountDownLatch localLatch = lastAsyncWorkLatch;
			if ( localLatch != null ) {
				try {
					localLatch.await();
				}
				catch (InterruptedException e) {
					Thread.currentThread().interrupt();
					throw LOG.interruptedWhileWaitingForRequestCompletion( e );
				}
			}
		}

		public void shutdown() {
			scheduler.shutdown();
			try {
				scheduler.awaitTermination( Long.MAX_VALUE, TimeUnit.SECONDS );
			}
			catch (InterruptedException e) {
				LOG.interruptedWhileWaitingForIndexActivity( e );
			}
			finally {
				final CountDownLatch localLatch = lastAsyncWorkLatch;
				if ( localLatch != null ) {
					//It's possible that a task was successfully scheduled but had no chance to run,
					//so we need to release waiting threads:
					localLatch.countDown();
				}
			}
		}
	}

	/**
	 * Takes requests from the queue and processes them.
	 */
	private class RequestProcessingRunnable implements Runnable {

		private final AsyncBackendRequestProcessor asyncProcessor;
		private final CountDownLatch latch = new CountDownLatch( 1 );

		public RequestProcessingRunnable(AsyncBackendRequestProcessor asyncProcessor) {
			this.asyncProcessor = asyncProcessor;
		}

		@Override
		public void run() {
			try {
				processAsyncWork();
			}
			finally {
				latch.countDown();
			}
		}

		private void processAsyncWork() {
			SequentialWorkExecutionContext context = new SequentialWorkExecutionContext(
					client, gsonProvider, workFactory, ElasticsearchWorkProcessor.this, errorHandler );
			synchronized ( asyncProcessor ) {
				while ( true ) {
					Iterable<ElasticsearchWork<?>> works = asyncProcessor.asyncWorkQueue.drainToDetachedIterable();
					if ( works == null ) {
						// Allow other async processors to be setup already as we're on our way to termination:
						asyncProcessor.asyncWorkerWasStarted.set( false );
						// Nothing more to do, flush and terminate:
						context.flush();
						return;
					}
					for ( ElasticsearchWork<?> work : createRequestGroups( works, false ) ) {
						work.execute( context );
					}
				}
			}
		}
	}

	private class ProcessorWorkGroupBuilder implements ElasticsearchWorkAggregator {

		private final boolean refreshInBulkAPICall;

		private final List<ElasticsearchWork<?>> result = new ArrayList<>();
		private final List<BulkableElasticsearchWork<?>> bulkInProgress = new ArrayList<>();

		public ProcessorWorkGroupBuilder(boolean refreshInBulkAPICall) {
			super();
			this.refreshInBulkAPICall = refreshInBulkAPICall;
		}

		@Override
		public void addBulkable(BulkableElasticsearchWork<?> work) {
			bulkInProgress.add( work );
			if ( bulkInProgress.size() >= MAX_BULK_SIZE ) {
				flushBulkInProgress();
			}
		}

		@Override
		public void addNonBulkable(ElasticsearchWork<?> work) {
			flushBulkInProgress();
			result.add( work );
		}

		private void flushBulkInProgress() {
			if ( bulkInProgress.isEmpty() ) {
				return;
			}

			if ( bulkInProgress.size() == 1 ) {
				ElasticsearchWork<?> work = bulkInProgress.iterator().next();
				result.add( work );
			}
			else {
				result.add( workFactory.bulk( bulkInProgress ).refresh( refreshInBulkAPICall ).build() );
			}
			bulkInProgress.clear();
		}

		private List<ElasticsearchWork<?>> build() {
			flushBulkInProgress();
			return result;
		}
	}
}