/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.integration;
import kafka.utils.MockTime;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
import org.apache.kafka.streams.kstream.KStreamBuilder;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.ValueJoiner;
import org.apache.kafka.streams.state.KeyValueIterator;
import org.apache.kafka.streams.state.QueryableStoreTypes;
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
import org.apache.kafka.test.IntegrationTest;
import org.apache.kafka.test.TestUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
@Category({IntegrationTest.class})
public class KTableKTableJoinIntegrationTest {
private final static int NUM_BROKERS = 1;
@ClassRule
public final static EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
private final static MockTime MOCK_TIME = CLUSTER.time;
private final static String TABLE_1 = "table1";
private final static String TABLE_2 = "table2";
private final static String TABLE_3 = "table3";
private final static String OUTPUT = "output-";
private static Properties streamsConfig;
private KafkaStreams streams;
private final static Properties CONSUMER_CONFIG = new Properties();
@BeforeClass
public static void beforeTest() throws Exception {
CLUSTER.createTopic(TABLE_1);
CLUSTER.createTopic(TABLE_2);
CLUSTER.createTopic(TABLE_3);
CLUSTER.createTopic(OUTPUT);
streamsConfig = new Properties();
streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfig.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfig.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
streamsConfig.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath());
streamsConfig.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
streamsConfig.put(IntegrationTestUtils.INTERNAL_LEAVE_GROUP_ON_CLOSE, true);
streamsConfig.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100);
final Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
final List<KeyValue<String, String>> table1 = Arrays.asList(
new KeyValue<>("a", "A1"),
new KeyValue<>("b", "B1")
);
final List<KeyValue<String, String>> table2 = Arrays.asList(
new KeyValue<>("b", "B2"),
new KeyValue<>("c", "C2")
);
final List<KeyValue<String, String>> table3 = Arrays.asList(
new KeyValue<>("a", "A3"),
new KeyValue<>("b", "B3"),
new KeyValue<>("c", "C3")
);
// put table 3 first, to make sure data is there when joining T1 with T2
IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_3, table3, producerConfig, MOCK_TIME);
IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_1, table1, producerConfig, MOCK_TIME);
IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_2, table2, producerConfig, MOCK_TIME);
CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, "ktable-ktable-consumer");
CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
}
@Before
public void before() throws Exception {
IntegrationTestUtils.purgeLocalStreamsState(streamsConfig);
}
@After
public void after() throws Exception {
if (streams != null) {
streams.close();
streams = null;
}
IntegrationTestUtils.purgeLocalStreamsState(streamsConfig);
}
private enum JoinType {
INNER, LEFT, OUTER
}
@Test
public void shouldInnerInnerJoin() throws Exception {
verifyKTableKTableJoin(JoinType.INNER, JoinType.INNER, Collections.singletonList(new KeyValue<>("b", "B1-B2-B3")), false);
}
@Test
public void shouldInnerInnerJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.INNER, JoinType.INNER, Collections.singletonList(new KeyValue<>("b", "B1-B2-B3")), true);
}
@Test
public void shouldInnerLeftJoin() throws Exception {
verifyKTableKTableJoin(JoinType.INNER, JoinType.LEFT, Collections.singletonList(new KeyValue<>("b", "B1-B2-B3")), false);
}
@Test
public void shouldInnerLeftJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.INNER, JoinType.LEFT, Collections.singletonList(new KeyValue<>("b", "B1-B2-B3")), true);
}
@Test
public void shouldInnerOuterJoin() throws Exception {
verifyKTableKTableJoin(JoinType.INNER, JoinType.OUTER, Arrays.asList(
new KeyValue<>("a", "null-A3"),
new KeyValue<>("b", "null-B3"),
new KeyValue<>("c", "null-C3"),
new KeyValue<>("b", "B1-B2-B3")), false);
}
@Test
public void shouldInnerOuterJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.INNER, JoinType.OUTER, Arrays.asList(
new KeyValue<>("a", "null-A3"),
new KeyValue<>("b", "null-B3"),
new KeyValue<>("c", "null-C3"),
new KeyValue<>("b", "B1-B2-B3")), true);
}
@Test
public void shouldLeftInnerJoin() throws Exception {
verifyKTableKTableJoin(JoinType.LEFT, JoinType.INNER, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3")), false);
}
@Test
public void shouldLeftInnerJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.LEFT, JoinType.INNER, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3")), true);
}
@Test
public void shouldLeftLeftJoin() throws Exception {
verifyKTableKTableJoin(JoinType.LEFT, JoinType.LEFT, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3")), false);
}
@Test
public void shouldLeftLeftJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.LEFT, JoinType.LEFT, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3")), true);
}
@Test
public void shouldLeftOuterJoin() throws Exception {
verifyKTableKTableJoin(JoinType.LEFT, JoinType.OUTER, Arrays.asList(
new KeyValue<>("a", "null-A3"),
new KeyValue<>("b", "null-B3"),
new KeyValue<>("c", "null-C3"),
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3")), false);
}
@Test
public void shouldLeftOuterJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.LEFT, JoinType.OUTER, Arrays.asList(
new KeyValue<>("a", "null-A3"),
new KeyValue<>("b", "null-B3"),
new KeyValue<>("c", "null-C3"),
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3")), true);
}
@Test
public void shouldOuterInnerJoin() throws Exception {
verifyKTableKTableJoin(JoinType.OUTER, JoinType.INNER, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3"),
new KeyValue<>("c", "null-C2-C3")), false);
}
@Test
public void shouldOuterInnerJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.OUTER, JoinType.INNER, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3"),
new KeyValue<>("c", "null-C2-C3")), true);
}
@Test
public void shouldOuterLeftJoin() throws Exception {
verifyKTableKTableJoin(JoinType.OUTER, JoinType.LEFT, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3"),
new KeyValue<>("c", "null-C2-C3")), false);
}
@Test
public void shouldOuterLeftJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.OUTER, JoinType.LEFT, Arrays.asList(
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3"),
new KeyValue<>("c", "null-C2-C3")), true);
}
@Test
public void shouldOuterOuterJoin() throws Exception {
verifyKTableKTableJoin(JoinType.OUTER, JoinType.OUTER, Arrays.asList(
new KeyValue<>("a", "null-A3"),
new KeyValue<>("b", "null-B3"),
new KeyValue<>("c", "null-C3"),
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3"),
new KeyValue<>("c", "null-C2-C3")), false);
}
@Test
public void shouldOuterOuterJoinQueryable() throws Exception {
verifyKTableKTableJoin(JoinType.OUTER, JoinType.OUTER, Arrays.asList(
new KeyValue<>("a", "null-A3"),
new KeyValue<>("b", "null-B3"),
new KeyValue<>("c", "null-C3"),
new KeyValue<>("a", "A1-null-A3"),
new KeyValue<>("b", "B1-null-B3"),
new KeyValue<>("b", "B1-B2-B3"),
new KeyValue<>("c", "null-C2-C3")), true);
}
private void verifyKTableKTableJoin(final JoinType joinType1,
final JoinType joinType2,
final List<KeyValue<String, String>> expectedResult,
boolean verifyQueryableState) throws Exception {
final String queryableName = verifyQueryableState ? joinType1 + "-" + joinType2 + "-ktable-ktable-join-query" : null;
streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, joinType1 + "-" + joinType2 + "-ktable-ktable-join" + queryableName);
streams = prepareTopology(joinType1, joinType2, queryableName);
streams.start();
final List<KeyValue<String, String>> result = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(
CONSUMER_CONFIG,
OUTPUT,
expectedResult.size());
assertThat(result, equalTo(expectedResult));
if (verifyQueryableState) {
verifyKTableKTableJoinQueryableState(joinType1, joinType2, expectedResult);
}
}
private void verifyKTableKTableJoinQueryableState(final JoinType joinType1,
final JoinType joinType2,
final List<KeyValue<String, String>> expectedResult) {
final String queryableName = joinType1 + "-" + joinType2 + "-ktable-ktable-join-query";
final ReadOnlyKeyValueStore<String, String> myJoinStore = streams.store(queryableName,
QueryableStoreTypes.<String, String>keyValueStore());
// store only keeps last set of values, not entire stream of value changes
final Map<String, String> expectedInStore = new HashMap<>();
for (KeyValue<String, String> expected : expectedResult) {
expectedInStore.put(expected.key, expected.value);
}
for (Map.Entry<String, String> expected : expectedInStore.entrySet()) {
assertEquals(expected.getValue(), myJoinStore.get(expected.getKey()));
}
final KeyValueIterator<String, String> all = myJoinStore.all();
while (all.hasNext()) {
KeyValue<String, String> storeEntry = all.next();
assertTrue(expectedResult.contains(storeEntry));
}
all.close();
}
private KafkaStreams prepareTopology(final JoinType joinType1, final JoinType joinType2, final String queryableName) {
final KStreamBuilder builder = new KStreamBuilder();
final KTable<String, String> table1 = builder.table(TABLE_1, TABLE_1);
final KTable<String, String> table2 = builder.table(TABLE_2, TABLE_2);
final KTable<String, String> table3 = builder.table(TABLE_3, TABLE_3);
join(join(table1, table2, joinType1, null /* no need to query intermediate result */), table3,
joinType2, queryableName).to(OUTPUT);
return new KafkaStreams(builder, new StreamsConfig(streamsConfig));
}
private KTable<String, String> join(final KTable<String, String> first,
final KTable<String, String> second,
final JoinType joinType,
final String queryableName) {
final ValueJoiner<String, String, String> joiner = new ValueJoiner<String, String, String>() {
@Override
public String apply(final String value1, final String value2) {
return value1 + "-" + value2;
}
};
switch (joinType) {
case INNER:
return first.join(second, joiner, Serdes.String(), queryableName);
case LEFT:
return first.leftJoin(second, joiner, Serdes.String(), queryableName);
case OUTER:
return first.outerJoin(second, joiner, Serdes.String(), queryableName);
}
throw new RuntimeException("Unknown join type.");
}
}