/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.kafka.internal;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext;
import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionState;
import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService;
import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema;
import org.apache.flink.util.SerializedValue;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* A fetcher that fetches data from Kafka brokers via the Kafka 0.9 consumer API.
*
* @param <T> The type of elements produced by the fetcher.
*/
public class Kafka09Fetcher<T> extends AbstractFetcher<T, TopicPartition> {
private static final Logger LOG = LoggerFactory.getLogger(Kafka09Fetcher.class);
// ------------------------------------------------------------------------
/** The schema to convert between Kafka's byte messages, and Flink's objects */
private final KeyedDeserializationSchema<T> deserializer;
/** The handover of data and exceptions between the consumer thread and the task thread */
private final Handover handover;
/** The thread that runs the actual KafkaConsumer and hand the record batches to this fetcher */
private final KafkaConsumerThread consumerThread;
/** Flag to mark the main work loop as alive */
private volatile boolean running = true;
// ------------------------------------------------------------------------
public Kafka09Fetcher(
SourceContext<T> sourceContext,
Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
ProcessingTimeService processingTimeProvider,
long autoWatermarkInterval,
ClassLoader userCodeClassLoader,
String taskNameWithSubtasks,
MetricGroup metricGroup,
KeyedDeserializationSchema<T> deserializer,
Properties kafkaProperties,
long pollTimeout,
boolean useMetrics) throws Exception
{
super(
sourceContext,
assignedPartitionsWithInitialOffsets,
watermarksPeriodic,
watermarksPunctuated,
processingTimeProvider,
autoWatermarkInterval,
userCodeClassLoader,
useMetrics);
this.deserializer = deserializer;
this.handover = new Handover();
final MetricGroup kafkaMetricGroup = metricGroup.addGroup("KafkaConsumer");
addOffsetStateGauge(kafkaMetricGroup);
this.consumerThread = new KafkaConsumerThread(
LOG,
handover,
kafkaProperties,
subscribedPartitionStates(),
kafkaMetricGroup,
createCallBridge(),
getFetcherName() + " for " + taskNameWithSubtasks,
pollTimeout,
useMetrics);
}
// ------------------------------------------------------------------------
// Fetcher work methods
// ------------------------------------------------------------------------
@Override
public void runFetchLoop() throws Exception {
try {
final Handover handover = this.handover;
// kick off the actual Kafka consumer
consumerThread.start();
while (running) {
// this blocks until we get the next records
// it automatically re-throws exceptions encountered in the fetcher thread
final ConsumerRecords<byte[], byte[]> records = handover.pollNext();
// get the records for each topic partition
for (KafkaTopicPartitionState<TopicPartition> partition : subscribedPartitionStates()) {
List<ConsumerRecord<byte[], byte[]>> partitionRecords =
records.records(partition.getKafkaPartitionHandle());
for (ConsumerRecord<byte[], byte[]> record : partitionRecords) {
final T value = deserializer.deserialize(
record.key(), record.value(),
record.topic(), record.partition(), record.offset());
if (deserializer.isEndOfStream(value)) {
// end of stream signaled
running = false;
break;
}
// emit the actual record. this also updates offset state atomically
// and deals with timestamps and watermark generation
emitRecord(value, partition, record.offset(), record);
}
}
}
}
finally {
// this signals the consumer thread that no more work is to be done
consumerThread.shutdown();
}
// on a clean exit, wait for the runner thread
try {
consumerThread.join();
}
catch (InterruptedException e) {
// may be the result of a wake-up interruption after an exception.
// we ignore this here and only restore the interruption state
Thread.currentThread().interrupt();
}
}
@Override
public void cancel() {
// flag the main thread to exit. A thread interrupt will come anyways.
running = false;
handover.close();
consumerThread.shutdown();
}
// ------------------------------------------------------------------------
// The below methods are overridden in the 0.10 fetcher, which otherwise
// reuses most of the 0.9 fetcher behavior
// ------------------------------------------------------------------------
protected void emitRecord(
T record,
KafkaTopicPartitionState<TopicPartition> partition,
long offset,
@SuppressWarnings("UnusedParameters") ConsumerRecord<?, ?> consumerRecord) throws Exception {
// the 0.9 Fetcher does not try to extract a timestamp
emitRecord(record, partition, offset);
}
/**
* Gets the name of this fetcher, for thread naming and logging purposes.
*/
protected String getFetcherName() {
return "Kafka 0.9 Fetcher";
}
protected KafkaConsumerCallBridge createCallBridge() {
return new KafkaConsumerCallBridge();
}
// ------------------------------------------------------------------------
// Implement Methods of the AbstractFetcher
// ------------------------------------------------------------------------
@Override
public TopicPartition createKafkaPartitionHandle(KafkaTopicPartition partition) {
return new TopicPartition(partition.getTopic(), partition.getPartition());
}
@Override
public void commitInternalOffsetsToKafka(Map<KafkaTopicPartition, Long> offsets) throws Exception {
KafkaTopicPartitionState<TopicPartition>[] partitions = subscribedPartitionStates();
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = new HashMap<>(partitions.length);
for (KafkaTopicPartitionState<TopicPartition> partition : partitions) {
Long lastProcessedOffset = offsets.get(partition.getKafkaTopicPartition());
if (lastProcessedOffset != null) {
// committed offsets through the KafkaConsumer need to be 1 more than the last processed offset.
// This does not affect Flink's checkpoints/saved state.
long offsetToCommit = lastProcessedOffset + 1;
offsetsToCommit.put(partition.getKafkaPartitionHandle(), new OffsetAndMetadata(offsetToCommit));
partition.setCommittedOffset(offsetToCommit);
}
}
// record the work to be committed by the main consumer thread and make sure the consumer notices that
consumerThread.setOffsetsToCommit(offsetsToCommit);
}
}