/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kafka.test; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.MockConsumer; import org.apache.kafka.clients.consumer.OffsetResetStrategy; import org.apache.kafka.clients.producer.MockProducer; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.metrics.Metrics; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.Serializer; import org.apache.kafka.common.utils.MockTime; import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.StreamsConfig; import org.apache.kafka.streams.StreamsMetrics; import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.processor.TaskId; import org.apache.kafka.streams.processor.TopologyBuilder; import org.apache.kafka.streams.processor.internals.GlobalProcessorContextImpl; import org.apache.kafka.streams.processor.internals.GlobalStateManagerImpl; import org.apache.kafka.streams.processor.internals.GlobalStateUpdateTask; import org.apache.kafka.streams.processor.internals.InternalProcessorContext; import org.apache.kafka.streams.processor.internals.MockStreamsMetrics; import org.apache.kafka.streams.processor.internals.ProcessorContextImpl; import org.apache.kafka.streams.processor.internals.ProcessorRecordContext; import org.apache.kafka.streams.processor.internals.ProcessorTopology; import org.apache.kafka.streams.processor.internals.StateDirectory; import org.apache.kafka.streams.processor.internals.StoreChangelogReader; import org.apache.kafka.streams.processor.internals.StreamTask; import org.apache.kafka.streams.state.KeyValueStore; import org.apache.kafka.streams.state.internals.ThreadCache; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; /** * This class makes it easier to write tests to verify the behavior of topologies created with a {@link TopologyBuilder}. * You can test simple topologies that have a single processor, or very complex topologies that have multiple sources, processors, * and sinks. And because it starts with a {@link TopologyBuilder}, you can create topologies specific to your tests or you * can use and test code you already have that uses a builder to create topologies. Best of all, the class works without a real * Kafka broker, so the tests execute very quickly with very little overhead. * <p> * Using the ProcessorTopologyTestDriver in tests is easy: simply instantiate the driver with a {@link StreamsConfig} and a * TopologyBuilder, use the driver to supply an input message to the topology, and then use the driver to read and verify any * messages output by the topology. * <p> * Although the driver doesn't use a real Kafka broker, it does simulate Kafka {@link org.apache.kafka.clients.consumer.Consumer}s * and {@link org.apache.kafka.clients.producer.Producer}s that read and write raw {@code byte[]} messages. You can either deal * with messages that have {@code byte[]} keys and values, or you can supply the {@link Serializer}s and {@link Deserializer}s * that the driver can use to convert the keys and values into objects. * * <h2>Driver setup</h2> * <p> * In order to create a ProcessorTopologyTestDriver instance, you need a TopologyBuilder and a {@link StreamsConfig}. The * configuration needs to be representative of what you'd supply to the real topology, so that means including several key * properties. For example, the following code fragment creates a configuration that specifies a local Kafka broker list * (which is needed but not used), a timestamp extractor, and default serializers and deserializers for string keys and values: * * <pre> * StringSerializer strSerializer = new StringSerializer(); * StringDeserializer strDeserializer = new StringDeserializer(); * Properties props = new Properties(); * props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9091"); * props.setProperty(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, CustomTimestampExtractor.class.getName()); * props.setProperty(StreamsConfig.KEY_SERIALIZER_CLASS_CONFIG, strSerializer.getClass().getName()); * props.setProperty(StreamsConfig.KEY_DESERIALIZER_CLASS_CONFIG, strDeserializer.getClass().getName()); * props.setProperty(StreamsConfig.VALUE_SERIALIZER_CLASS_CONFIG, strSerializer.getClass().getName()); * props.setProperty(StreamsConfig.VALUE_DESERIALIZER_CLASS_CONFIG, strDeserializer.getClass().getName()); * StreamsConfig config = new StreamsConfig(props); * TopologyBuilder builder = ... * ProcessorTopologyTestDriver driver = new ProcessorTopologyTestDriver(config, builder); * </pre> * * <h2>Processing messages</h2> * <p> * Your test can supply new input records on any of the topics that the topology's sources consume. Here's an example of an * input message on the topic named {@code input-topic}: * * <pre> * driver.process("input-topic", "key1", "value1", strSerializer, strSerializer); * </pre> * * Immediately, the driver will pass the input message through to the appropriate source that consumes the named topic, * and will invoke the processor(s) downstream of the source. If your topology's processors forward messages to sinks, * your test can then consume these output messages to verify they match the expected outcome. For example, if our topology * should have generated 2 messages on {@code output-topic-1} and 1 message on {@code output-topic-2}, then our test can * obtain these messages using the {@link #readOutput(String, Deserializer, Deserializer)} method: * * <pre> * ProducerRecord<String, String> record1 = driver.readOutput("output-topic-1", strDeserializer, strDeserializer); * ProducerRecord<String, String> record2 = driver.readOutput("output-topic-1", strDeserializer, strDeserializer); * ProducerRecord<String, String> record3 = driver.readOutput("output-topic-2", strDeserializer, strDeserializer); * </pre> * * Again, our example topology generates messages with string keys and values, so we supply our string deserializer instance * for use on both the keys and values. Your test logic can then verify whether these output records are correct. * <p> * Finally, when completed, make sure your tests {@link #close()} the driver to release all resources and * {@link org.apache.kafka.streams.processor.Processor}s. * * <h2>Processor state</h2> * <p> * Some processors use Kafka {@link StateStore state storage}, so this driver class provides the {@link #getStateStore(String)} * and {@link #getKeyValueStore(String)} methods so that your tests can check the underlying state store(s) used by your * topology's processors. In our previous example, after we supplied a single input message and checked the three output messages, * our test could also check the key value store to verify the processor correctly added, removed, or updated internal state. * Or, our test might have pre-populated some state <em>before</em> submitting the input message, and verified afterward that the * processor(s) correctly updated the state. */ public class ProcessorTopologyTestDriver { private final static String APPLICATION_ID = "test-driver-application"; private final static int PARTITION_ID = 0; private final static TaskId TASK_ID = new TaskId(0, PARTITION_ID); private final ProcessorTopology topology; private final MockProducer<byte[], byte[]> producer; private final Map<String, TopicPartition> partitionsByTopic = new HashMap<>(); private final Map<TopicPartition, AtomicLong> offsetsByTopicPartition = new HashMap<>(); private final Map<String, Queue<ProducerRecord<byte[], byte[]>>> outputRecordsByTopic = new HashMap<>(); private final Set<String> internalTopics = new HashSet<>(); private final Map<String, TopicPartition> globalPartitionsByTopic = new HashMap<>(); private StreamTask task; private GlobalStateUpdateTask globalStateTask; /** * Create a new test driver instance. * @param config the stream configuration for the topology * @param builder the topology builder that will be used to create the topology instance */ public ProcessorTopologyTestDriver(final StreamsConfig config, final TopologyBuilder builder) { topology = builder.setApplicationId(APPLICATION_ID).build(null); final ProcessorTopology globalTopology = builder.buildGlobalStateTopology(); // Set up the consumer and producer ... final Consumer<byte[], byte[]> consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); final Serializer<byte[]> bytesSerializer = new ByteArraySerializer(); producer = new MockProducer<byte[], byte[]>(true, bytesSerializer, bytesSerializer) { @Override public List<PartitionInfo> partitionsFor(final String topic) { return Collections.singletonList(new PartitionInfo(topic, PARTITION_ID, null, null, null)); } }; // Identify internal topics for forwarding in process ... for (final TopologyBuilder.TopicsInfo topicsInfo : builder.topicGroups().values()) { internalTopics.addAll(topicsInfo.repartitionSourceTopics.keySet()); } // Set up all of the topic+partition information and subscribe the consumer to each ... for (final String topic : topology.sourceTopics()) { final TopicPartition tp = new TopicPartition(topic, PARTITION_ID); partitionsByTopic.put(topic, tp); offsetsByTopicPartition.put(tp, new AtomicLong()); } consumer.assign(offsetsByTopicPartition.keySet()); final StateDirectory stateDirectory = new StateDirectory(APPLICATION_ID, TestUtils.tempDirectory().getPath(), Time.SYSTEM); final StreamsMetrics streamsMetrics = new MockStreamsMetrics(new Metrics()); final ThreadCache cache = new ThreadCache("mock", 1024 * 1024, streamsMetrics); if (globalTopology != null) { final MockConsumer<byte[], byte[]> globalConsumer = createGlobalConsumer(); for (final String topicName : globalTopology.sourceTopics()) { final List<PartitionInfo> partitionInfos = new ArrayList<>(); partitionInfos.add(new PartitionInfo(topicName, 1, null, null, null)); globalConsumer.updatePartitions(topicName, partitionInfos); final TopicPartition partition = new TopicPartition(topicName, 1); globalConsumer.updateEndOffsets(Collections.singletonMap(partition, 0L)); globalPartitionsByTopic.put(topicName, partition); offsetsByTopicPartition.put(partition, new AtomicLong()); } final GlobalStateManagerImpl stateManager = new GlobalStateManagerImpl(globalTopology, globalConsumer, stateDirectory); globalStateTask = new GlobalStateUpdateTask(globalTopology, new GlobalProcessorContextImpl(config, stateManager, streamsMetrics, cache), stateManager ); globalStateTask.initialize(); } if (!partitionsByTopic.isEmpty()) { task = new StreamTask(TASK_ID, APPLICATION_ID, partitionsByTopic.values(), topology, consumer, new StoreChangelogReader( createRestoreConsumer(topology.storeToChangelogTopic()), Time.SYSTEM, 5000), config, streamsMetrics, stateDirectory, cache, new MockTime(), producer); } } /** * Send an input message with the given key, value and timestamp on the specified topic to the topology, and then commit the messages. * * @param topicName the name of the topic on which the message is to be sent * @param key the raw message key * @param value the raw message value * @param timestamp the raw message timestamp */ private void process(final String topicName, final byte[] key, final byte[] value, final long timestamp) { final TopicPartition tp = partitionsByTopic.get(topicName); if (tp != null) { // Add the record ... final long offset = offsetsByTopicPartition.get(tp).incrementAndGet(); task.addRecords(tp, records(new ConsumerRecord<>(tp.topic(), tp.partition(), offset, timestamp, TimestampType.CREATE_TIME, 0L, 0, 0, key, value))); producer.clear(); // Process the record ... task.process(); ((InternalProcessorContext) task.context()).setRecordContext(new ProcessorRecordContext(timestamp, offset, tp.partition(), topicName)); task.commit(); // Capture all the records sent to the producer ... for (final ProducerRecord<byte[], byte[]> record : producer.history()) { Queue<ProducerRecord<byte[], byte[]>> outputRecords = outputRecordsByTopic.get(record.topic()); if (outputRecords == null) { outputRecords = new LinkedList<>(); outputRecordsByTopic.put(record.topic(), outputRecords); } outputRecords.add(record); // Forward back into the topology if the produced record is to an internal or a source topic ... if (internalTopics.contains(record.topic()) || topology.sourceTopics().contains(record.topic())) { process(record.topic(), record.key(), record.value(), record.timestamp()); } } } else { final TopicPartition global = globalPartitionsByTopic.get(topicName); if (global == null) { throw new IllegalArgumentException("Unexpected topic: " + topicName); } final long offset = offsetsByTopicPartition.get(global).incrementAndGet(); globalStateTask.update(new ConsumerRecord<>(global.topic(), global.partition(), offset, timestamp, TimestampType.CREATE_TIME, 0L, 0, 0, key, value)); globalStateTask.flushState(); } } /** * Send an input message with the given key and value on the specified topic to the topology. * * @param topicName the name of the topic on which the message is to be sent * @param key the raw message key * @param value the raw message value */ public void process(final String topicName, final byte[] key, final byte[] value) { process(topicName, key, value, 0L); } /** * Send an input message with the given key and value on the specified topic to the topology. * * @param topicName the name of the topic on which the message is to be sent * @param key the raw message key * @param value the raw message value * @param keySerializer the serializer for the key * @param valueSerializer the serializer for the value */ public <K, V> void process(final String topicName, final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) { process(topicName, keySerializer.serialize(topicName, key), valueSerializer.serialize(topicName, value)); } /** * Read the next record from the given topic. These records were output by the topology during the previous calls to * {@link #process(String, byte[], byte[])}. * * @param topic the name of the topic * @return the next record on that topic, or null if there is no record available */ public ProducerRecord<byte[], byte[]> readOutput(final String topic) { final Queue<ProducerRecord<byte[], byte[]>> outputRecords = outputRecordsByTopic.get(topic); if (outputRecords == null) { return null; } return outputRecords.poll(); } /** * Read the next record from the given topic. These records were output by the topology during the previous calls to * {@link #process(String, byte[], byte[])}. * * @param topic the name of the topic * @param keyDeserializer the deserializer for the key type * @param valueDeserializer the deserializer for the value type * @return the next record on that topic, or null if there is no record available */ public <K, V> ProducerRecord<K, V> readOutput(final String topic, final Deserializer<K> keyDeserializer, final Deserializer<V> valueDeserializer) { final ProducerRecord<byte[], byte[]> record = readOutput(topic); if (record == null) { return null; } final K key = keyDeserializer.deserialize(record.topic(), record.key()); final V value = valueDeserializer.deserialize(record.topic(), record.value()); return new ProducerRecord<>(record.topic(), record.partition(), record.timestamp(), key, value); } private Iterable<ConsumerRecord<byte[], byte[]>> records(final ConsumerRecord<byte[], byte[]> record) { return Collections.singleton(record); } /** * Get the {@link StateStore} with the given name. The name should have been supplied via * {@link #ProcessorTopologyTestDriver(StreamsConfig, TopologyBuilder) this object's constructor}, and is * presumed to be used by a Processor within the topology. * <p> * This is often useful in test cases to pre-populate the store before the test case instructs the topology to * {@link #process(String, byte[], byte[]) process an input message}, and/or to check the store afterward. * * @param name the name of the store * @return the state store, or null if no store has been registered with the given name * @see #getKeyValueStore(String) */ public StateStore getStateStore(final String name) { return ((ProcessorContextImpl) task.context()).getStateMgr().getStore(name); } /** * Get the {@link KeyValueStore} with the given name. The name should have been supplied via * {@link #ProcessorTopologyTestDriver(StreamsConfig, TopologyBuilder) this object's constructor}, and is * presumed to be used by a Processor within the topology. * <p> * This is often useful in test cases to pre-populate the store before the test case instructs the topology to * {@link #process(String, byte[], byte[]) process an input message}, and/or to check the store afterward. * <p> * * @param name the name of the store * @return the key value store, or null if no {@link KeyValueStore} has been registered with the given name * @see #getStateStore(String) */ @SuppressWarnings("unchecked") public <K, V> KeyValueStore<K, V> getKeyValueStore(final String name) { final StateStore store = getStateStore(name); return store instanceof KeyValueStore ? (KeyValueStore<K, V>) getStateStore(name) : null; } /** * Close the driver, its topology, and all processors. */ public void close() { if (task != null) { task.close(true); } if (globalStateTask != null) { try { globalStateTask.close(); } catch (final IOException e) { // ignore } } } /** * Utility method that creates the {@link MockConsumer} used for restoring state, which should not be done by this * driver object unless this method is overwritten with a functional consumer. * * @param storeToChangelogTopic the map of the names of the stores to the changelog topics * @return the mock consumer; never null */ private MockConsumer<byte[], byte[]> createRestoreConsumer(final Map<String, String> storeToChangelogTopic) { final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.LATEST) { @Override public synchronized void seekToEnd(final Collection<TopicPartition> partitions) {} @Override public synchronized void seekToBeginning(final Collection<TopicPartition> partitions) {} @Override public synchronized long position(final TopicPartition partition) { return 0L; } }; // For each store ... for (final Map.Entry<String, String> storeAndTopic: storeToChangelogTopic.entrySet()) { final String topicName = storeAndTopic.getValue(); // Set up the restore-state topic ... // consumer.subscribe(new TopicPartition(topicName, 1)); // Set up the partition that matches the ID (which is what ProcessorStateManager expects) ... final List<PartitionInfo> partitionInfos = new ArrayList<>(); partitionInfos.add(new PartitionInfo(topicName, PARTITION_ID, null, null, null)); consumer.updatePartitions(topicName, partitionInfos); consumer.updateEndOffsets(Collections.singletonMap(new TopicPartition(topicName, PARTITION_ID), 0L)); } return consumer; } private MockConsumer<byte[], byte[]> createGlobalConsumer() { return new MockConsumer<byte[], byte[]>(OffsetResetStrategy.LATEST) { @Override public synchronized void seekToEnd(final Collection<TopicPartition> partitions) {} @Override public synchronized void seekToBeginning(final Collection<TopicPartition> partitions) {} @Override public synchronized long position(final TopicPartition partition) { return 0L; } }; } }