/*
* Copyright (c) 2016 Red Hat, Inc. and/or its affiliates.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Cheng Fang - Initial API and implementation
*/
package org.jberet.support.io;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.batch.api.BatchProperty;
import javax.batch.api.chunk.ItemReader;
import javax.enterprise.context.Dependent;
import javax.inject.Inject;
import javax.inject.Named;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import org.jberet.support._private.SupportMessages;
/**
* An implementation of {@code ItemReader} that reads data items from Kafka topics.
* This reader class keeps track of the current read position, including current topic name, topic partition number,
* and topic partition offset. Therefore, it is recommended to disable Kafka auto commit in Kafka consumer properties,
* e.g., {@code enable.auto.commit=false}. Kafka consumer properties are specified in batch property {@link #configFile}.
* <p>
* This reader class supports retry and restart, using the tracked read position as checkpoint info.
* <p>
* It is also recommended to turn off Kafka consumer automatic group management; instead manually assign topics and
* partitions for the consumer. See batch property {@link #topicPartitions}.
*
* @see KafkaItemWriter
* @see KafkaItemReaderWriterBase
*
* @since 1.3.0
*/
@Named
@Dependent
public class KafkaItemReader extends KafkaItemReaderWriterBase implements ItemReader {
/**
* A list of topic-and-partition in the form of "topicName1:partitionNumber1, topicName2:partitionNumber2, ".
* For example, "orders:0, orders:1, returns:0, returns:1".
*
* @see org.jberet.support.io.KafkaItemReaderWriterBase#topicPartitionDelimiter
* @see "org.apache.kafka.common.TopicPartition"
*/
@Inject
@BatchProperty
protected List<String> topicPartitions;
/**
* The time, in milliseconds, spent waiting in poll if data is not available. If 0, returns
* immediately with any records that are available now. Must not be negative.
*
* @see "org.apache.kafka.clients.consumer.KafkaConsumer#poll(long)"
*/
@Inject
@BatchProperty
protected long pollTimeout;
/**
* Kafka consumer instance based on configuration properties specified in {@link #configFile}.
* It is created in {@link #open(Serializable)} method, and closed in {@link #close()} method.
*
*/
protected KafkaConsumer consumer;
/**
* Holds records obtained from polling Kafka server, and feeds to {@link #readItem()} method one record a time.
* When it is null, or contains no more element, {@link #readItem()} method polls Kafka server to obtain more
* records.
*/
protected Iterator<ConsumerRecord> recordsBuffer;
/**
* A mapping of topic-partition and its offset to track the progress of each
* {@code TopicPartition}. The mapping key is of the form "<topicName>:<partitionNumber>, and the value
* is the offset of that {@code TopicPartition} as a {@code Long} number.
* This field serves as this item reader's checkpoint data, and must be serializable.
*/
protected HashMap<String, Long> topicPartitionOffset = new HashMap<String, Long>();
/**
* During the reader opening, the Kafka consumer is instantiated, and {@code checkpoint}, if any, is analyzed to
* position the reader properly. The Kafka consumer is created based on the configuration properties as specified
* in the batch property {@link #configFile}. The consumer is then assigned topic partitions as specified in the
* batch property {@link #topicPartitions}.
*
* @param checkpoint checkpoint info, null for the first invocation in a new job execution
* @throws Exception if error occurs
*/
@SuppressWarnings("unchecked")
@Override
public void open(final Serializable checkpoint) throws Exception {
consumer = new KafkaConsumer(createConfigProperties());
consumer.assign(createTopicPartitions());
if (checkpoint != null) {
final HashMap<String, Long> chkp = (HashMap<String, Long>) checkpoint;
for (final Map.Entry<String, Long> e : chkp.entrySet()) {
final String key = e.getKey();
final String topic;
final int partition;
final int colonPos = key.lastIndexOf(topicPartitionDelimiter);
if (colonPos > 0) {
topic = key.substring(0, colonPos);
partition = Integer.parseInt(key.substring(colonPos + 1));
} else if (colonPos < 0) {
topic = key;
partition = 0;
} else {
throw SupportMessages.MESSAGES.invalidCheckpoint(checkpoint);
}
final long newStartPosition = chkp.get(key) + 1;
consumer.seek(new TopicPartition(topic, partition), newStartPosition);
}
}
}
/**
* Returns reader checkpoint info that includes topic name, partition number and partition offset for each
* topic partition assigned to current Kafka consumer.
*
* @return reader checkpoint info as {@code HashMap<String, Long>}
*/
@Override
public Serializable checkpointInfo() {
return topicPartitionOffset;
}
/**
* Reads 1 record and return its value object, and updates the current read position.
* Since Kafka consumer poll operation retrieves a collection of records, which are cached in this reader class to
* mimic the read-one-item-at-a-time behavior. Therefore, Kafka consumer poll operation is only invoked when the
* local cache does not exist or contains no more entry. If no more record can be retrieved from Kafka server, null
* is returned.
*
* @return the value object of the read record from Kafka server
*
* @throws Exception if error occurs
*/
@SuppressWarnings("unchecked")
@Override
public Object readItem() throws Exception {
if (recordsBuffer == null || !recordsBuffer.hasNext()) {
ConsumerRecords records = consumer.poll(pollTimeout);
if (records == null || records.isEmpty()) {
return null;
}
recordsBuffer = records.iterator();
}
if (recordsBuffer.hasNext()) {
final ConsumerRecord rec = recordsBuffer.next();
if (rec == null) {
return null;
}
final Object val = rec.value();
topicPartitionOffset.put(rec.topic() + topicPartitionDelimiter + rec.partition(), rec.offset());
return val;
}
return null;
}
/**
* Closes the Kafka consumer.
*/
@Override
public void close() {
if (consumer != null) {
consumer.close();
consumer = null;
}
}
/**
* Creates and returns a list of {@code TopicPartition} based on the injected batch property {@link #topicPartitions}.
*
* @return a list of {@code org.apache.kafka.common.TopicPartition}
*/
protected List<TopicPartition> createTopicPartitions() {
final List<TopicPartition> tps = new ArrayList<TopicPartition>();
if (topicPartitions != null) {
for (final String e : topicPartitions) {
final int colonPos = e.lastIndexOf(topicPartitionDelimiter);
if (colonPos > 0) {
tps.add(new TopicPartition(e.substring(0, colonPos), Integer.parseInt(e.substring(colonPos + 1))));
} else if (colonPos < 0) {
tps.add(new TopicPartition(e, 0));
} else {
throw SupportMessages.MESSAGES.invalidReaderWriterProperty(null, topicPartitions.toString(), "topicPartitions");
}
}
} else {
throw SupportMessages.MESSAGES.invalidReaderWriterProperty(null, null, "topicPartitions");
}
return tps;
}
}