ElasticsearchSinkBaseTest.java example

Explorer
flink-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.connectors.elasticsearch;

import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.core.testutils.CheckedThread;
import org.apache.flink.core.testutils.MultiShotLatch;
import org.apache.flink.streaming.api.operators.StreamSink;
import org.apache.flink.streaming.connectors.elasticsearch.util.NoOpFailureHandler;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionResponse;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import java.util.List;

import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

/**
 * Suite of tests for {@link ElasticsearchSinkBase}.
 */
public class ElasticsearchSinkBaseTest {

	/** Tests that any item failure in the listener callbacks is rethrown on an immediately following invoke call. */
	@Test
	public void testItemFailureRethrownOnInvoke() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new NoOpFailureHandler());

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and its mock item failures
		sink.setMockItemFailuresListForNextBulkItemResponses(Collections.singletonList(new Exception("artificial failure for record")));
		testHarness.processElement(new StreamRecord<>("msg"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// manually execute the next bulk request
		sink.manualBulkRequestWithAllPendingRequests();

		try {
			testHarness.processElement(new StreamRecord<>("next msg"));
		} catch (Exception e) {
			// the invoke should have failed with the failure
			Assert.assertTrue(e.getCause().getMessage().contains("artificial failure for record"));

			// test succeeded
			return;
		}

		Assert.fail();
	}

	/** Tests that any item failure in the listener callbacks is rethrown on an immediately following checkpoint. */
	@Test
	public void testItemFailureRethrownOnCheckpoint() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new NoOpFailureHandler());

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and its mock item failures
		sink.setMockItemFailuresListForNextBulkItemResponses(Collections.singletonList(new Exception("artificial failure for record")));
		testHarness.processElement(new StreamRecord<>("msg"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// manually execute the next bulk request
		sink.manualBulkRequestWithAllPendingRequests();

		try {
			testHarness.snapshot(1L, 1000L);
		} catch (Exception e) {
			// the snapshot should have failed with the failure
			Assert.assertTrue(e.getCause().getCause().getMessage().contains("artificial failure for record"));

			// test succeeded
			return;
		}

		Assert.fail();
	}

	/**
	 * Tests that any item failure in the listener callbacks due to flushing on an immediately following checkpoint
	 * is rethrown; we set a timeout because the test will not finish if the logic is broken
	 */
	@Test(timeout=5000)
	public void testItemFailureRethrownOnCheckpointAfterFlush() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new NoOpFailureHandler());

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and its mock item failures

		List<Exception> mockResponsesList = new ArrayList<>(2);
		mockResponsesList.add(null); // the first request in a bulk will succeed
		mockResponsesList.add(new Exception("artificial failure for record")); // the second request in a bulk will fail
		sink.setMockItemFailuresListForNextBulkItemResponses(mockResponsesList);

		testHarness.processElement(new StreamRecord<>("msg-1"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// manually execute the next bulk request (1 request only, thus should succeed)
		sink.manualBulkRequestWithAllPendingRequests();

		// setup the requests to be flushed in the snapshot
		testHarness.processElement(new StreamRecord<>("msg-2"));
		testHarness.processElement(new StreamRecord<>("msg-3"));
		verify(sink.getMockBulkProcessor(), times(3)).add(any(ActionRequest.class));

		CheckedThread snapshotThread = new CheckedThread() {
			@Override
			public void go() throws Exception {
				testHarness.snapshot(1L, 1000L);
			}
		};
		snapshotThread.start();

		// the snapshot should eventually be blocked before snapshot triggers flushing
		while (snapshotThread.getState() != Thread.State.WAITING) {
			Thread.sleep(10);
		}

		// let the snapshot-triggered flush continue (2 records in the bulk, so the 2nd one should fail)
		sink.continueFlush();

		try {
			snapshotThread.sync();
		} catch (Exception e) {
			// the snapshot should have failed with the failure from the 2nd request
			Assert.assertTrue(e.getCause().getCause().getMessage().contains("artificial failure for record"));

			// test succeeded
			return;
		}

		Assert.fail();
	}

	/** Tests that any bulk failure in the listener callbacks is rethrown on an immediately following invoke call. */
	@Test
	public void testBulkFailureRethrownOnInvoke() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new NoOpFailureHandler());

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and let the whole bulk request fail
		sink.setFailNextBulkRequestCompletely(new Exception("artificial failure for bulk request"));
		testHarness.processElement(new StreamRecord<>("msg"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// manually execute the next bulk request
		sink.manualBulkRequestWithAllPendingRequests();

		try {
			testHarness.processElement(new StreamRecord<>("next msg"));
		} catch (Exception e) {
			// the invoke should have failed with the bulk request failure
			Assert.assertTrue(e.getCause().getMessage().contains("artificial failure for bulk request"));

			// test succeeded
			return;
		}

		Assert.fail();
	}

	/** Tests that any bulk failure in the listener callbacks is rethrown on an immediately following checkpoint. */
	@Test
	public void testBulkFailureRethrownOnCheckpoint() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new NoOpFailureHandler());

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and let the whole bulk request fail
		sink.setFailNextBulkRequestCompletely(new Exception("artificial failure for bulk request"));
		testHarness.processElement(new StreamRecord<>("msg"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// manually execute the next bulk request
		sink.manualBulkRequestWithAllPendingRequests();

		try {
			testHarness.snapshot(1L, 1000L);
		} catch (Exception e) {
			// the snapshot should have failed with the bulk request failure
			Assert.assertTrue(e.getCause().getCause().getMessage().contains("artificial failure for bulk request"));

			// test succeeded
			return;
		}

		Assert.fail();
	}

	/**
	 * Tests that any bulk failure in the listener callbacks due to flushing on an immediately following checkpoint
	 * is rethrown; we set a timeout because the test will not finish if the logic is broken.
	 */
	@Test(timeout=5000)
	public void testBulkFailureRethrownOnOnCheckpointAfterFlush() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new NoOpFailureHandler());

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and let bulk request succeed
		sink.setMockItemFailuresListForNextBulkItemResponses(Collections.singletonList((Exception) null));
		testHarness.processElement(new StreamRecord<>("msg-1"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// manually execute the next bulk request
		sink.manualBulkRequestWithAllPendingRequests();

		// setup the requests to be flushed in the snapshot
		testHarness.processElement(new StreamRecord<>("msg-2"));
		testHarness.processElement(new StreamRecord<>("msg-3"));
		verify(sink.getMockBulkProcessor(), times(3)).add(any(ActionRequest.class));

		CheckedThread snapshotThread = new CheckedThread() {
			@Override
			public void go() throws Exception {
				testHarness.snapshot(1L, 1000L);
			}
		};
		snapshotThread.start();

		// the snapshot should eventually be blocked before snapshot triggers flushing
		while (snapshotThread.getState() != Thread.State.WAITING) {
			Thread.sleep(10);
		}

		// for the snapshot-triggered flush, we let the bulk request fail completely
		sink.setFailNextBulkRequestCompletely(new Exception("artificial failure for bulk request"));

		// let the snapshot-triggered flush continue (bulk request should fail completely)
		sink.continueFlush();

		try {
			snapshotThread.sync();
		} catch (Exception e) {
			// the snapshot should have failed with the bulk request failure
			Assert.assertTrue(e.getCause().getCause().getMessage().contains("artificial failure for bulk request"));

			// test succeeded
			return;
		}

		Assert.fail();
	}

	/**
	 * Tests that the sink correctly waits for pending requests (including re-added requests) on checkpoints;
	 * we set a timeout because the test will not finish if the logic is broken
	 */
	@Test(timeout=5000)
	public void testAtLeastOnceSink() throws Throwable {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
				new HashMap<String, String>(),
				new SimpleSinkFunction<String>(),
				new DummyRetryFailureHandler()); // use a failure handler that simply re-adds requests

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and its mock item failures;
		// it contains 1 request, which will fail and re-added to the next bulk request
		sink.setMockItemFailuresListForNextBulkItemResponses(Collections.singletonList(new Exception("artificial failure for record")));
		testHarness.processElement(new StreamRecord<>("msg"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		CheckedThread snapshotThread = new CheckedThread() {
			@Override
			public void go() throws Exception {
				testHarness.snapshot(1L, 1000L);
			}
		};
		snapshotThread.start();

		// the snapshot should eventually be blocked before snapshot triggers flushing
		while (snapshotThread.getState() != Thread.State.WAITING) {
			Thread.sleep(10);
		}

		sink.continueFlush();

		// since the previous flush should have resulted in a request re-add from the failure handler,
		// we should have flushed again, and eventually be blocked before snapshot triggers the 2nd flush
		while (snapshotThread.getState() != Thread.State.WAITING) {
			Thread.sleep(10);
		}

		// current number of pending request should be 1 due to the re-add
		Assert.assertEquals(1, sink.getNumPendingRequests());

		// this time, let the bulk request succeed, so no-more requests are re-added
		sink.setMockItemFailuresListForNextBulkItemResponses(Collections.singletonList((Exception) null));

		sink.continueFlush();

		// the snapshot should finish with no exceptions
		snapshotThread.sync();

		testHarness.close();
	}

	/**
	 * This test is meant to assure that testAtLeastOnceSink is valid by testing that if flushing is disabled,
	 * the snapshot method does indeed finishes without waiting for pending requests;
	 * we set a timeout because the test will not finish if the logic is broken
	 */
	@Test(timeout=5000)
	public void testDoesNotWaitForPendingRequestsIfFlushingDisabled() throws Exception {
		final DummyElasticsearchSink<String> sink = new DummyElasticsearchSink<>(
			new HashMap<String, String>(), new SimpleSinkFunction<String>(), new DummyRetryFailureHandler());
		sink.disableFlushOnCheckpoint(); // disable flushing

		final OneInputStreamOperatorTestHarness<String, Object> testHarness =
			new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink));

		testHarness.open();

		// setup the next bulk request, and let bulk request succeed
		sink.setMockItemFailuresListForNextBulkItemResponses(Collections.singletonList(new Exception("artificial failure for record")));
		testHarness.processElement(new StreamRecord<>("msg-1"));
		verify(sink.getMockBulkProcessor(), times(1)).add(any(ActionRequest.class));

		// the snapshot should not block even though we haven't flushed the bulk request
		testHarness.snapshot(1L, 1000L);

		testHarness.close();
	}

	private static class DummyElasticsearchSink<T> extends ElasticsearchSinkBase<T> {

		private static final long serialVersionUID = 5051907841570096991L;

		private transient BulkProcessor mockBulkProcessor;
		private transient BulkRequest nextBulkRequest = new BulkRequest();
		private transient MultiShotLatch flushLatch = new MultiShotLatch();

		private List<? extends Throwable> mockItemFailuresList;
		private Throwable nextBulkFailure;

		public DummyElasticsearchSink(
				Map<String, String> userConfig,
				ElasticsearchSinkFunction<T> sinkFunction,
				ActionRequestFailureHandler failureHandler) {
			super(new DummyElasticsearchApiCallBridge(), userConfig, sinkFunction, failureHandler);
		}

		/**
		 * This method is used to mimic a scheduled bulk request; we need to do this
		 * manually because we are mocking the BulkProcessor
		 */
		public void manualBulkRequestWithAllPendingRequests() {
			flushLatch.trigger(); // let the flush
			mockBulkProcessor.flush();
		}

		/**
		 * On non-manual flushes, i.e. when flush is called in the snapshot method implementation,
		 * usages need to explicitly call this to allow the flush to continue. This is useful
		 * to make sure that specific requests get added to the the next bulk request for flushing.
		 */
		public void continueFlush() {
			flushLatch.trigger();
		}

		/**
		 * Set the list of mock failures to use for the next bulk of item responses. A {@code null}
		 * means that the response is successful, failed otherwise.
		 *
		 * The list is used with corresponding order to the requests in the bulk, i.e. the first
		 * request uses the response at index 0, the second requests uses the response at index 1, etc.
		 */
		public void setMockItemFailuresListForNextBulkItemResponses(List<? extends Throwable> mockItemFailuresList) {
			this.mockItemFailuresList = mockItemFailuresList;
		}

		/**
		 * Let the next bulk request fail completely with the provided throwable.
		 * If this is set, the failures list provided with setMockItemFailuresListForNextBulkItemResponses is not respected.
		 */
		public void setFailNextBulkRequestCompletely(Throwable failure) {
			this.nextBulkFailure = failure;
		}

		public BulkProcessor getMockBulkProcessor() {
			return mockBulkProcessor;
		}

		/**
		 * Override the bulk processor build process to provide a mock implementation,
		 * but reuse the listener implementation in our mock to test that the listener logic
		 * works correctly with request flushing logic.
		 */
		@Override
		protected BulkProcessor buildBulkProcessor(final BulkProcessor.Listener listener) {
			this.mockBulkProcessor = mock(BulkProcessor.class);

			when(mockBulkProcessor.add(any(ActionRequest.class))).thenAnswer(new Answer<Object>() {
				@Override
				public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
					// intercept the request and add it to our mock bulk request
					nextBulkRequest.add(invocationOnMock.getArgumentAt(0, ActionRequest.class));

					return null;
				}
			});

			doAnswer(new Answer() {
				@Override
				public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
					while (nextBulkRequest.numberOfActions() > 0) {
						// wait until we are allowed to continue with the flushing
						flushLatch.await();

						// create a copy of the accumulated mock requests, so that
						// re-added requests from the failure handler are included in the next bulk
						BulkRequest currentBulkRequest = nextBulkRequest;
						nextBulkRequest = new BulkRequest();

						listener.beforeBulk(123L, currentBulkRequest);

						if (nextBulkFailure == null) {
							BulkItemResponse[] mockResponses = new BulkItemResponse[currentBulkRequest.requests().size()];
							for (int i = 0; i < currentBulkRequest.requests().size(); i++) {
								Throwable mockItemFailure = mockItemFailuresList.get(i);

								if (mockItemFailure == null) {
									// the mock response for the item is success
									mockResponses[i] = new BulkItemResponse(i, "opType", mock(ActionResponse.class));
								} else {
									// the mock response for the item is failure
									mockResponses[i] = new BulkItemResponse(i, "opType", new BulkItemResponse.Failure("index", "type", "id", mockItemFailure));
								}
							}

							listener.afterBulk(123L, currentBulkRequest, new BulkResponse(mockResponses, 1000L));
						} else {
							listener.afterBulk(123L, currentBulkRequest, nextBulkFailure);
						}
					}

					return null;
				}
			}).when(mockBulkProcessor).flush();

			return mockBulkProcessor;
		}
	}

	private static class DummyElasticsearchApiCallBridge implements ElasticsearchApiCallBridge {

		private static final long serialVersionUID = -4272760730959041699L;

		@Override
		public Client createClient(Map<String, String> clientConfig) {
			return mock(Client.class);
		}

		@Nullable
		@Override
		public Throwable extractFailureCauseFromBulkItemResponse(BulkItemResponse bulkItemResponse) {
			if (bulkItemResponse.isFailed()) {
				return new Exception(bulkItemResponse.getFailure().getMessage());
			} else {
				return null;
			}
		}

		@Override
		public void configureBulkProcessorBackoff(BulkProcessor.Builder builder, @Nullable ElasticsearchSinkBase.BulkFlushBackoffPolicy flushBackoffPolicy) {
			// no need for this in the test cases here
		}

		@Override
		public void cleanup() {
			// nothing to cleanup
		}
	}

	private static class SimpleSinkFunction<String> implements ElasticsearchSinkFunction<String> {

		private static final long serialVersionUID = -176739293659135148L;

		@Override
		public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
			Map<java.lang.String, Object> json = new HashMap<>();
			json.put("data", element);

			indexer.add(
				Requests.indexRequest()
					.index("index")
					.type("type")
					.id("id")
					.source(json)
			);
		}
	}

	private static class DummyRetryFailureHandler implements ActionRequestFailureHandler {

		private static final long serialVersionUID = 5400023700099200745L;

		@Override
		public void onFailure(ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) throws Throwable {
			indexer.add(action);
		}
	}
}