/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.action.bulk; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.Matcher; import java.util.Collections; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThanOrEqualTo; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 2) public class BulkProcessorRetryIT extends ESIntegTestCase { private static final String INDEX_NAME = "test"; private static final String TYPE_NAME = "type"; @Override protected Settings nodeSettings(int nodeOrdinal) { //Have very low pool and queue sizes to overwhelm internal pools easily return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) // don't mess with this one! It's quite sensitive to a low queue size // (see also ThreadedActionListener which is happily spawning threads even when we already got rejected) //.put("thread_pool.listener.queue_size", 1) .put("thread_pool.get.queue_size", 1) // default is 50 .put("thread_pool.bulk.queue_size", 30) .build(); } public void testBulkRejectionLoadWithoutBackoff() throws Throwable { boolean rejectedExecutionExpected = true; executeBulkRejectionLoad(BackoffPolicy.noBackoff(), rejectedExecutionExpected); } public void testBulkRejectionLoadWithBackoff() throws Throwable { boolean rejectedExecutionExpected = false; executeBulkRejectionLoad(BackoffPolicy.exponentialBackoff(), rejectedExecutionExpected); } private void executeBulkRejectionLoad(BackoffPolicy backoffPolicy, boolean rejectedExecutionExpected) throws Throwable { final CorrelatingBackoffPolicy internalPolicy = new CorrelatingBackoffPolicy(backoffPolicy); int numberOfAsyncOps = randomIntBetween(600, 700); final CountDownLatch latch = new CountDownLatch(numberOfAsyncOps); final Set<Object> responses = Collections.newSetFromMap(new ConcurrentHashMap<>()); assertAcked(prepareCreate(INDEX_NAME)); ensureGreen(); BulkProcessor bulkProcessor = BulkProcessor.builder(client(), new BulkProcessor.Listener() { @Override public void beforeBulk(long executionId, BulkRequest request) { // no op } @Override public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { internalPolicy.logResponse(response); responses.add(response); latch.countDown(); } @Override public void afterBulk(long executionId, BulkRequest request, Throwable failure) { responses.add(failure); latch.countDown(); } }).setBulkActions(1) // zero means that we're in the sync case, more means that we're in the async case .setConcurrentRequests(randomIntBetween(0, 100)) .setBackoffPolicy(internalPolicy) .build(); indexDocs(bulkProcessor, numberOfAsyncOps); latch.await(10, TimeUnit.SECONDS); bulkProcessor.close(); assertThat(responses.size(), equalTo(numberOfAsyncOps)); // validate all responses for (Object response : responses) { if (response instanceof BulkResponse) { BulkResponse bulkResponse = (BulkResponse) response; for (BulkItemResponse bulkItemResponse : bulkResponse.getItems()) { if (bulkItemResponse.isFailed()) { BulkItemResponse.Failure failure = bulkItemResponse.getFailure(); Throwable rootCause = ExceptionsHelper.unwrapCause(failure.getCause()); if (rootCause instanceof EsRejectedExecutionException) { if (rejectedExecutionExpected == false) { Iterator<TimeValue> backoffState = internalPolicy.backoffStateFor(bulkResponse); assertNotNull("backoffState is null (indicates a bulk request got rejected without retry)", backoffState); if (backoffState.hasNext()) { // we're not expecting that we overwhelmed it even once when we maxed out the number of retries throw new AssertionError("Got rejected although backoff policy would allow more retries", rootCause); } else { logger.debug("We maxed out the number of bulk retries and got rejected (this is ok)."); } } } else { throw new AssertionError("Unexpected failure", rootCause); } } } } else { Throwable t = (Throwable) response; // we're not expecting any other errors throw new AssertionError("Unexpected failure", t); } } client().admin().indices().refresh(new RefreshRequest()).get(); // validate we did not create any duplicates due to retries Matcher<Long> searchResultCount; // it is ok if we lost some index operations to rejected executions (which is possible even when backing off (although less likely) searchResultCount = lessThanOrEqualTo((long) numberOfAsyncOps); SearchResponse results = client() .prepareSearch(INDEX_NAME) .setTypes(TYPE_NAME) .setQuery(QueryBuilders.matchAllQuery()) .setSize(0) .get(); assertThat(results.getHits().getTotalHits(), searchResultCount); } private static void indexDocs(BulkProcessor processor, int numDocs) { for (int i = 1; i <= numDocs; i++) { processor.add(client() .prepareIndex() .setIndex(INDEX_NAME) .setType(TYPE_NAME) .setId(Integer.toString(i)) .setSource("field", randomRealisticUnicodeOfLengthBetween(1, 30)) .request()); } } /** * Internal helper class to correlate backoff states with bulk responses. This is needed to check whether we maxed out the number * of retries but still got rejected (which is perfectly fine and can also happen from time to time under heavy load). * * This implementation relies on an implementation detail in Retry, namely that the bulk listener is notified on the same thread * as the last call to the backoff policy's iterator. The advantage is that this is non-invasive to the rest of the production code. */ private static class CorrelatingBackoffPolicy extends BackoffPolicy { private final Map<BulkResponse, Iterator<TimeValue>> correlations = new ConcurrentHashMap<>(); // this is intentionally *not* static final. We will only ever have one instance of this class per test case and want the // thread local to be eligible for garbage collection right after the test to avoid leaks. private final ThreadLocal<Iterator<TimeValue>> iterators = new ThreadLocal<>(); private final BackoffPolicy delegate; private CorrelatingBackoffPolicy(BackoffPolicy delegate) { this.delegate = delegate; } public Iterator<TimeValue> backoffStateFor(BulkResponse response) { return correlations.get(response); } // Assumption: This method is called from the same thread as the last call to the internal iterator's #hasNext() / #next() // see also Retry.AbstractRetryHandler#onResponse(). public void logResponse(BulkResponse response) { Iterator<TimeValue> iterator = iterators.get(); // did we ever retry? if (iterator != null) { // we should correlate any iterator only once iterators.remove(); correlations.put(response, iterator); } } @Override public Iterator<TimeValue> iterator() { return new CorrelatingIterator(iterators, delegate.iterator()); } private static class CorrelatingIterator implements Iterator<TimeValue> { private final Iterator<TimeValue> delegate; private final ThreadLocal<Iterator<TimeValue>> iterators; private CorrelatingIterator(ThreadLocal<Iterator<TimeValue>> iterators, Iterator<TimeValue> delegate) { this.iterators = iterators; this.delegate = delegate; } @Override public boolean hasNext() { // update on every invocation as we might get rescheduled on a different thread. Unfortunately, there is a chance that // we pollute the thread local map with stale values. Due to the implementation of Retry and the life cycle of the // enclosing class CorrelatingBackoffPolicy this should not pose a major problem though. iterators.set(this); return delegate.hasNext(); } @Override public TimeValue next() { // update on every invocation iterators.set(this); return delegate.next(); } } } }