// Copyright 2016 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // //////////////////////////////////////////////////////////////////////////////// package com.google.pubsub.kafka.source; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.any; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.protobuf.ByteString; import com.google.protobuf.Empty; import com.google.pubsub.kafka.common.ConnectorUtils; import com.google.pubsub.v1.AcknowledgeRequest; import com.google.pubsub.v1.PubsubMessage; import com.google.pubsub.v1.PullRequest; import com.google.pubsub.v1.PullResponse; import com.google.pubsub.v1.ReceivedMessage; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** Tests for {@link CloudPubSubSourceTask}. */ public class CloudPubSubSourceTaskTest { private static final Logger log = LoggerFactory.getLogger(CloudPubSubSourceTaskTest.class); private static final String CPS_PROJECT = "the"; private static final String CPS_MAX_BATCH_SIZE = "1000"; private static final String CPS_SUBSCRIPTION = "quick"; private static final String KAFKA_TOPIC = "brown"; private static final String KAFKA_MESSAGE_KEY_ATTRIBUTE = "fox"; private static final String KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE = "jumped"; private static final String KAFKA_PARTITIONS = "3"; private static final ByteString CPS_MESSAGE = ByteString.copyFromUtf8("over"); private static final byte[] KAFKA_VALUE = CPS_MESSAGE.toByteArray(); private static final String ACK_ID1 = "ackID1"; private static final String ACK_ID2 = "ackID2"; private static final String ACK_ID3 = "ackID3"; private static final String ACK_ID4 = "ackID4"; private CloudPubSubSourceTask task; private Map<String, String> props; private CloudPubSubSubscriber subscriber; /** * Compare two SourceRecords. This is necessary because the records' values contain a byte[] and * the .equals on a SourceRecord does not take this into account. */ public void assertRecordsEqual(SourceRecord sr1, SourceRecord sr2) { assertEquals(sr1.key(), sr2.key()); assertEquals(sr1.keySchema(), sr2.keySchema()); assertEquals(sr1.valueSchema(), sr2.valueSchema()); assertEquals(sr1.topic(), sr2.topic()); if (sr1.valueSchema() == Schema.BYTES_SCHEMA) { assertArrayEquals((byte[])sr1.value(), (byte[])sr2.value()); } else { for(Field f : sr1.valueSchema().fields()) { if (f.name().equals(ConnectorUtils.KAFKA_MESSAGE_CPS_BODY_FIELD)) { assertArrayEquals(((Struct)sr1.value()).getBytes(f.name()), ((Struct)sr2.value()).getBytes(f.name())); } else { assertEquals(((Struct)sr1.value()).getString(f.name()), ((Struct)sr2.value()).getString(f.name())); } } } } @Before public void setup() { subscriber = mock(CloudPubSubSubscriber.class, RETURNS_DEEP_STUBS); task = new CloudPubSubSourceTask(subscriber); props = new HashMap<>(); props.put(ConnectorUtils.CPS_PROJECT_CONFIG, CPS_PROJECT); props.put(CloudPubSubSourceConnector.CPS_MAX_BATCH_SIZE_CONFIG, CPS_MAX_BATCH_SIZE); props.put(CloudPubSubSourceConnector.CPS_SUBSCRIPTION_CONFIG, CPS_SUBSCRIPTION); props.put(CloudPubSubSourceConnector.KAFKA_TOPIC_CONFIG, KAFKA_TOPIC); props.put(CloudPubSubSourceConnector.KAFKA_MESSAGE_KEY_CONFIG, KAFKA_MESSAGE_KEY_ATTRIBUTE); props.put(CloudPubSubSourceConnector.KAFKA_PARTITIONS_CONFIG, KAFKA_PARTITIONS); props.put( CloudPubSubSourceConnector.KAFKA_PARTITION_SCHEME_CONFIG, CloudPubSubSourceConnector.PartitionScheme.ROUND_ROBIN.toString()); } /** Tests when no messages are received from the Cloud Pub/Sub PullResponse. */ @Test public void testPollCaseWithNoMessages() throws Exception { task.start(props); PullResponse stubbedPullResponse = PullResponse.newBuilder().build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); assertEquals(0, task.poll().size()); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); } /** * Tests that when ackMessages() succeeds and the subsequent call to poll() has no messages, that * the subscriber does not invoke ackMessages because there should be no acks. */ @Test public void testPollInRegularCase() throws Exception { task.start(props); ReceivedMessage rm1 = createReceivedMessage(ACK_ID1, CPS_MESSAGE, new HashMap<String, String>()); PullResponse stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(rm1).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); assertEquals(1, result.size()); stubbedPullResponse = PullResponse.newBuilder().build(); ListenableFuture<Empty> goodFuture = Futures.immediateFuture(Empty.getDefaultInstance()); when(subscriber.ackMessages(any(AcknowledgeRequest.class))).thenReturn(goodFuture); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); result = task.poll(); assertEquals(0, result.size()); result = task.poll(); assertEquals(0, result.size()); verify(subscriber, times(1)).ackMessages(any(AcknowledgeRequest.class)); } /** * Tests that when a call to ackMessages() fails, that the message is not sent again to Kafka if * the message is received again by Cloud Pub/Sub. Also tests that ack ids are added properly if * the ack id has not been seen before. */ @Test public void testPollWithDuplicateReceivedMessages() throws Exception { task.start(props); ReceivedMessage rm1 = createReceivedMessage(ACK_ID1, CPS_MESSAGE, new HashMap<String, String>()); PullResponse stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(rm1).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); assertEquals(1, result.size()); ReceivedMessage rm2 = createReceivedMessage(ACK_ID2, CPS_MESSAGE, new HashMap<String, String>()); stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(0, rm1).addReceivedMessages(1, rm2).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); ListenableFuture<Empty> failedFuture = Futures.immediateFailedFuture(new Throwable()); when(subscriber.ackMessages(any(AcknowledgeRequest.class))).thenReturn(failedFuture); result = task.poll(); assertEquals(1, result.size()); verify(subscriber, times(1)).ackMessages(any(AcknowledgeRequest.class)); } /** * Tests when the message(s) retrieved from Cloud Pub/Sub do not have an attribute that matches * {@link #KAFKA_MESSAGE_KEY_ATTRIBUTE}. */ @Test public void testPollWithNoMessageKeyAttribute() throws Exception { task.start(props); ReceivedMessage rm = createReceivedMessage(ACK_ID1, CPS_MESSAGE, new HashMap<String, String>()); PullResponse stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(rm).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); assertEquals(1, result.size()); SourceRecord expected = new SourceRecord( null, null, KAFKA_TOPIC, 0, Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); assertRecordsEqual(expected, result.get(0)); } /** * Tests when the message(s) retrieved from Cloud Pub/Sub do have an attribute that matches {@link * #KAFKA_MESSAGE_KEY_ATTRIBUTE}. */ @Test public void testPollWithMessageKeyAttribute() throws Exception { task.start(props); Map<String, String> attributes = new HashMap<>(); attributes.put(KAFKA_MESSAGE_KEY_ATTRIBUTE, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE); ReceivedMessage rm = createReceivedMessage(ACK_ID1, CPS_MESSAGE, attributes); PullResponse stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(rm).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); assertEquals(1, result.size()); SourceRecord expected = new SourceRecord( null, null, KAFKA_TOPIC, 0, Schema.OPTIONAL_STRING_SCHEMA, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE, Schema.BYTES_SCHEMA, KAFKA_VALUE); assertRecordsEqual(expected, result.get(0)); } /** * Tests when the message retrieved from Cloud Pub/Sub have several attributes, including * one that matches {@link #KAFKA_MESSAGE_KEY_ATTRIBUTE}. */ @Test public void testPollWithMultipleAttributes() throws Exception { task.start(props); Map<String, String> attributes = new HashMap<>(); attributes.put(KAFKA_MESSAGE_KEY_ATTRIBUTE, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE); attributes.put("attribute1", "attribute_value1"); attributes.put("attribute2", "attribute_value2"); ReceivedMessage rm = createReceivedMessage(ACK_ID1, CPS_MESSAGE, attributes); PullResponse stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(rm).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); assertEquals(1, result.size()); Schema expectedSchema = SchemaBuilder.struct() .field(ConnectorUtils.KAFKA_MESSAGE_CPS_BODY_FIELD, Schema.BYTES_SCHEMA) .field("attribute1", Schema.STRING_SCHEMA) .field("attribute2", Schema.STRING_SCHEMA) .build(); Struct expectedValue = new Struct(expectedSchema) .put(ConnectorUtils.KAFKA_MESSAGE_CPS_BODY_FIELD, KAFKA_VALUE) .put("attribute1", "attribute_value1") .put("attribute2", "attribute_value2"); SourceRecord expected = new SourceRecord( null, null, KAFKA_TOPIC, 0, Schema.OPTIONAL_STRING_SCHEMA, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE, expectedSchema, expectedValue); assertRecordsEqual(expected, result.get(0)); } /** * Tests that the correct partition is assigned when the partition scheme is "hash_key". The test * has two cases, one where a key does exist and one where it does not. */ @Test public void testPollWithPartitionSchemeHashKey() throws Exception { props.put( CloudPubSubSourceConnector.KAFKA_PARTITION_SCHEME_CONFIG, CloudPubSubSourceConnector.PartitionScheme.HASH_KEY.toString()); task.start(props); Map<String, String> attributes = new HashMap<>(); attributes.put(KAFKA_MESSAGE_KEY_ATTRIBUTE, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE); ReceivedMessage withoutKey = createReceivedMessage(ACK_ID1, CPS_MESSAGE, new HashMap<String, String>()); ReceivedMessage withKey = createReceivedMessage(ACK_ID2, CPS_MESSAGE, attributes); PullResponse stubbedPullResponse = PullResponse.newBuilder() .addReceivedMessages(0, withKey) .addReceivedMessages(1, withoutKey) .build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); assertEquals(2, result.size()); SourceRecord expectedForMessageWithKey = new SourceRecord( null, null, KAFKA_TOPIC, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE.hashCode() % Integer.parseInt(KAFKA_PARTITIONS), Schema.OPTIONAL_STRING_SCHEMA, KAFKA_MESSAGE_KEY_ATTRIBUTE_VALUE, Schema.BYTES_SCHEMA, KAFKA_VALUE); SourceRecord expectedForMessageWithoutKey = new SourceRecord( null, null, KAFKA_TOPIC, 0, Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); assertRecordsEqual(expectedForMessageWithKey, result.get(0)); assertArrayEquals((byte[])expectedForMessageWithoutKey.value(), (byte[])result.get(1).value()); } /** Tests that the correct partition is assigned when the partition scheme is "hash_value". */ @Test public void testPollWithPartitionSchemeHashValue() throws Exception { props.put( CloudPubSubSourceConnector.KAFKA_PARTITION_SCHEME_CONFIG, CloudPubSubSourceConnector.PartitionScheme.HASH_VALUE.toString()); task.start(props); ReceivedMessage rm = createReceivedMessage(ACK_ID1, CPS_MESSAGE, new HashMap<String, String>()); PullResponse stubbedPullResponse = PullResponse.newBuilder().addReceivedMessages(rm).build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); assertEquals(1, result.size()); SourceRecord expected = new SourceRecord( null, null, KAFKA_TOPIC, KAFKA_VALUE.hashCode() % Integer.parseInt(KAFKA_PARTITIONS), Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); assertRecordsEqual(expected, result.get(0)); } /** * Tests that the correct partition is assigned when the partition scheme is "round_robin". The * tests makes sure to submit an approrpriate number of messages to poll() so that all partitions * in the round robin are hit once. */ @Test public void testPollWithPartitionSchemeRoundRobin() throws Exception { task.start(props); ReceivedMessage rm1 = createReceivedMessage(ACK_ID1, CPS_MESSAGE, new HashMap<String, String>()); ReceivedMessage rm2 = createReceivedMessage(ACK_ID2, CPS_MESSAGE, new HashMap<String, String>()); ReceivedMessage rm3 = createReceivedMessage(ACK_ID3, CPS_MESSAGE, new HashMap<String, String>()); ReceivedMessage rm4 = createReceivedMessage(ACK_ID4, CPS_MESSAGE, new HashMap<String, String>()); PullResponse stubbedPullResponse = PullResponse.newBuilder() .addReceivedMessages(0, rm1) .addReceivedMessages(1, rm2) .addReceivedMessages(2, rm3) .addReceivedMessages(3, rm4) .build(); when(subscriber.pull(any(PullRequest.class)).get()).thenReturn(stubbedPullResponse); List<SourceRecord> result = task.poll(); verify(subscriber, never()).ackMessages(any(AcknowledgeRequest.class)); assertEquals(4, result.size()); SourceRecord expected1 = new SourceRecord( null, null, KAFKA_TOPIC, 0, Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); SourceRecord expected2 = new SourceRecord( null, null, KAFKA_TOPIC, 1, Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); SourceRecord expected3 = new SourceRecord( null, null, KAFKA_TOPIC, 2, Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); SourceRecord expected4 = new SourceRecord( null, null, KAFKA_TOPIC, 0, Schema.OPTIONAL_STRING_SCHEMA, null, Schema.BYTES_SCHEMA, KAFKA_VALUE); assertRecordsEqual(expected1, result.get(0)); assertRecordsEqual(expected2, result.get(1)); assertRecordsEqual(expected3, result.get(2)); assertRecordsEqual(expected4, result.get(3)); } @Test(expected = RuntimeException.class) public void testPollExceptionCase() throws Exception { task.start(props); // Could also throw ExecutionException if we wanted to... when(subscriber.pull(any(PullRequest.class)).get()).thenThrow(new InterruptedException()); task.poll(); } private ReceivedMessage createReceivedMessage( String ackId, ByteString data, Map<String, String> attributes) { PubsubMessage message = PubsubMessage.newBuilder().setData(data).putAllAttributes(attributes).build(); return ReceivedMessage.newBuilder().setAckId(ackId).setMessage(message).build(); } }