package org.xbib.elasticsearch.helper.client; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.bulk.BulkAction; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.client.Client; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.FutureUtils; import java.io.Closeable; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; /** * A bulk processor is a thread safe bulk processing class, allowing to easily set when to "flush" a new bulk request * (either based on number of actions, based on the size, or time), and to easily control the number of concurrent bulk * requests allowed to be executed in parallel. * In order to create a new bulk processor, use the {@link Builder}. */ public class BulkProcessor implements Closeable { /** * A listener for the execution. */ public interface Listener { /** * Callback before the bulk is executed. * @param executionId execution ID * @param request request */ void beforeBulk(long executionId, BulkRequest request); /** * Callback after a successful execution of bulk request. * @param executionId execution ID * @param request request * @param response response */ void afterBulk(long executionId, BulkRequest request, BulkResponse response); /** * Callback after a failed execution of bulk request. * * Note that in case an instance of <code>InterruptedException</code> is passed, which means that request processing has been * cancelled externally, the thread's interruption status has been restored prior to calling this method. * @param executionId execution ID * @param request request * @param failure failure */ void afterBulk(long executionId, BulkRequest request, Throwable failure); } /** * A builder used to create a build an instance of a bulk processor. */ public static class Builder { private final Client client; private final Listener listener; private String name; private int concurrentRequests = 1; private int bulkActions = 1000; private ByteSizeValue bulkSize = new ByteSizeValue(5, ByteSizeUnit.MB); private TimeValue flushInterval = null; /** * Creates a builder of bulk processor with the client to use and the listener that will be used * to be notified on the completion of bulk requests. * @param client the client * @param listener the listener */ public Builder(Client client, Listener listener) { this.client = client; this.listener = listener; } /** * Sets an optional name to identify this bulk processor. * @param name name * @return this builder */ public Builder setName(String name) { this.name = name; return this; } /** * Sets the number of concurrent requests allowed to be executed. A value of 0 means that only a single * request will be allowed to be executed. A value of 1 means 1 concurrent request is allowed to be executed * while accumulating new bulk requests. Defaults to <tt>1</tt>. * @param concurrentRequests maximum number of concurrent requests * @return this builder */ public Builder setConcurrentRequests(int concurrentRequests) { this.concurrentRequests = concurrentRequests; return this; } /** * Sets when to flush a new bulk request based on the number of actions currently added. Defaults to * <tt>1000</tt>. Can be set to <tt>-1</tt> to disable it. * @param bulkActions mbulk actions * @return this builder */ public Builder setBulkActions(int bulkActions) { this.bulkActions = bulkActions; return this; } /** * Sets when to flush a new bulk request based on the size of actions currently added. Defaults to * <tt>5mb</tt>. Can be set to <tt>-1</tt> to disable it. * @param bulkSize bulk size * @return this builder */ public Builder setBulkSize(ByteSizeValue bulkSize) { this.bulkSize = bulkSize; return this; } /** * Sets a flush interval flushing *any* bulk actions pending if the interval passes. Defaults to not set. * Note, both {@link #setBulkActions(int)} and {@link #setBulkSize(org.elasticsearch.common.unit.ByteSizeValue)} * can be set to <tt>-1</tt> with the flush interval set allowing for complete async processing of bulk actions. * @param flushInterval flush interval * @return this builder */ public Builder setFlushInterval(TimeValue flushInterval) { this.flushInterval = flushInterval; return this; } /** * Builds a new bulk processor. * @return a bulk processor */ public BulkProcessor build() { return new BulkProcessor(client, listener, name, concurrentRequests, bulkActions, bulkSize, flushInterval); } } public static Builder builder(Client client, Listener listener) { if (client == null) { throw new NullPointerException("The client you specified while building a BulkProcessor is null"); } return new Builder(client, listener); } private final int bulkActions; private final long bulkSize; private final ScheduledThreadPoolExecutor scheduler; private final ScheduledFuture scheduledFuture; private final AtomicLong executionIdGen = new AtomicLong(); private BulkRequest bulkRequest; private final BulkRequestHandler bulkRequestHandler; private volatile boolean closed = false; BulkProcessor(Client client, Listener listener, @Nullable String name, int concurrentRequests, int bulkActions, ByteSizeValue bulkSize, @Nullable TimeValue flushInterval) { this.bulkActions = bulkActions; this.bulkSize = bulkSize.bytes(); this.bulkRequest = new BulkRequest(); this.bulkRequestHandler = concurrentRequests == 0 ? new SyncBulkRequestHandler(client, listener) : new AsyncBulkRequestHandler(client, listener, concurrentRequests); if (flushInterval != null) { this.scheduler = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(1, EsExecutors.daemonThreadFactory(client.settings(), (name != null ? "[" + name + "]" : "") + "bulk_processor")); this.scheduler.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); this.scheduler.setContinueExistingPeriodicTasksAfterShutdownPolicy(false); this.scheduledFuture = this.scheduler.scheduleWithFixedDelay(new Flush(), flushInterval.millis(), flushInterval.millis(), TimeUnit.MILLISECONDS); } else { this.scheduler = null; this.scheduledFuture = null; } } /** * Closes the processor. If flushing by time is enabled, then it's shutdown. Any remaining bulk actions are flushed. */ @Override public void close() { try { awaitClose(0, TimeUnit.NANOSECONDS); } catch(InterruptedException exc) { Thread.currentThread().interrupt(); } } /** * Closes the processor. If flushing by time is enabled, then it's shutdown. Any remaining bulk actions are flushed. * * If concurrent requests are not enabled, returns {@code true} immediately. * If concurrent requests are enabled, waits for up to the specified timeout for all bulk requests to complete then returns {@code true}, * If the specified waiting time elapses before all bulk requests complete, {@code false} is returned. * * @param timeout The maximum time to wait for the bulk requests to complete * @param unit The time unit of the {@code timeout} argument * @return {@code true} if all bulk requests completed and {@code false} if the waiting time elapsed before all the bulk requests completed * @throws InterruptedException If the current thread is interrupted */ public synchronized boolean awaitClose(long timeout, TimeUnit unit) throws InterruptedException { if (closed) { return true; } closed = true; if (this.scheduledFuture != null) { FutureUtils.cancel(this.scheduledFuture); this.scheduler.shutdown(); } if (bulkRequest.numberOfActions() > 0) { execute(); } return this.bulkRequestHandler.awaitClose(timeout, unit); } /** * Adds an {@link IndexRequest} to the list of actions to execute. Follows the same behavior of {@link IndexRequest} * (for example, if no id is provided, one will be generated, or usage of the create flag). * @param request request * @return his bulk processor */ public BulkProcessor add(IndexRequest request) { return add((ActionRequest) request); } /** * Adds an {@link DeleteRequest} to the list of actions to execute. * @param request request * @return his bulk processor */ public BulkProcessor add(DeleteRequest request) { return add((ActionRequest) request); } /** * Adds either a delete or an index request. * @param request request * @return his bulk processor */ public BulkProcessor add(ActionRequest request) { return add(request, null); } /** * Adds either a delete or an index request with a payload. * @param request request * @param payload payload * @return his bulk processor */ public BulkProcessor add(ActionRequest request, @Nullable Object payload) { internalAdd(request, payload); return this; } protected void ensureOpen() { if (closed) { throw new IllegalStateException("bulk process already closed"); } } private synchronized void internalAdd(ActionRequest request, @Nullable Object payload) { ensureOpen(); bulkRequest.add(request, payload); executeIfNeeded(); } public BulkProcessor add(BytesReference data, @Nullable String defaultIndex, @Nullable String defaultType) throws Exception { return add(data, defaultIndex, defaultType, null); } public synchronized BulkProcessor add(BytesReference data, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable Object payload) throws Exception { bulkRequest.add(data, defaultIndex, defaultType, null, null, payload, true); executeIfNeeded(); return this; } private void executeIfNeeded() { ensureOpen(); if (!isOverTheLimit()) { return; } execute(); } private void execute() { final BulkRequest bulkRequest = this.bulkRequest; final long executionId = executionIdGen.incrementAndGet(); this.bulkRequest = new BulkRequest(); this.bulkRequestHandler.execute(bulkRequest, executionId); } private boolean isOverTheLimit() { return bulkActions != -1 && bulkRequest.numberOfActions() >= bulkActions || bulkSize != -1 && bulkRequest.estimatedSizeInBytes() >= bulkSize; } /** * Flush pending delete or index requests. */ public synchronized void flush() { ensureOpen(); if (bulkRequest.numberOfActions() > 0) { execute(); } } class Flush implements Runnable { @Override public void run() { synchronized (BulkProcessor.this) { if (closed) { return; } if (bulkRequest.numberOfActions() == 0) { return; } execute(); } } } /** * Abstracts the low-level details of bulk request handling */ abstract class BulkRequestHandler { public abstract void execute(BulkRequest bulkRequest, long executionId); public abstract boolean awaitClose(long timeout, TimeUnit unit) throws InterruptedException; } class SyncBulkRequestHandler extends BulkRequestHandler { private final Client client; private final BulkProcessor.Listener listener; public SyncBulkRequestHandler(Client client, BulkProcessor.Listener listener) { this.client = client; this.listener = listener; } public void execute(BulkRequest bulkRequest, long executionId) { boolean afterCalled = false; try { listener.beforeBulk(executionId, bulkRequest); BulkResponse bulkResponse = client.execute(BulkAction.INSTANCE, bulkRequest).actionGet(); afterCalled = true; listener.afterBulk(executionId, bulkRequest, bulkResponse); } catch (Throwable t) { if (!afterCalled) { listener.afterBulk(executionId, bulkRequest, t); } } } public boolean awaitClose(long timeout, TimeUnit unit) throws InterruptedException { return true; } } class AsyncBulkRequestHandler extends BulkRequestHandler { private final Client client; private final BulkProcessor.Listener listener; private final Semaphore semaphore; private final int concurrentRequests; private AsyncBulkRequestHandler(Client client, BulkProcessor.Listener listener, int concurrentRequests) { this.client = client; this.listener = listener; this.concurrentRequests = concurrentRequests; this.semaphore = new Semaphore(concurrentRequests); } @Override public void execute(final BulkRequest bulkRequest, final long executionId) { boolean bulkRequestSetupSuccessful = false; boolean acquired = false; try { listener.beforeBulk(executionId, bulkRequest); semaphore.acquire(); acquired = true; client.execute(BulkAction.INSTANCE, bulkRequest, new ActionListener<BulkResponse>() { @Override public void onResponse(BulkResponse response) { try { listener.afterBulk(executionId, bulkRequest, response); } finally { semaphore.release(); } } @Override public void onFailure(Throwable e) { try { listener.afterBulk(executionId, bulkRequest, e); } finally { semaphore.release(); } } }); bulkRequestSetupSuccessful = true; } catch (InterruptedException e) { Thread.currentThread().interrupt(); listener.afterBulk(executionId, bulkRequest, e); } catch (Throwable t) { listener.afterBulk(executionId, bulkRequest, t); } finally { if (!bulkRequestSetupSuccessful && acquired) { // if we fail on client.bulk() release the semaphore semaphore.release(); } } } @Override public boolean awaitClose(long timeout, TimeUnit unit) throws InterruptedException { if (semaphore.tryAcquire(this.concurrentRequests, timeout, unit)) { semaphore.release(this.concurrentRequests); return true; } return false; } } }