/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.pinterest.secor.reader;
import com.pinterest.secor.common.OffsetTracker;
import com.pinterest.secor.common.SecorConfig;
import com.pinterest.secor.common.TopicPartition;
import com.pinterest.secor.message.Message;
import com.pinterest.secor.util.IdUtil;
import com.pinterest.secor.util.RateLimitUtil;
import com.pinterest.secor.util.StatsUtil;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.consumer.TopicFilter;
import kafka.consumer.Whitelist;
import kafka.consumer.Blacklist;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* Message reader consumer raw Kafka messages.
*
* @author Pawel Garbacki (pawel@pinterest.com)
*/
public class MessageReader {
private static final Logger LOG = LoggerFactory.getLogger(MessageReader.class);
protected SecorConfig mConfig;
protected OffsetTracker mOffsetTracker;
protected ConsumerConnector mConsumerConnector;
protected ConsumerIterator mIterator;
protected HashMap<TopicPartition, Long> mLastAccessTime;
protected final int mTopicPartitionForgetSeconds;
protected final int mCheckMessagesPerSecond;
protected int mNMessages;
public MessageReader(SecorConfig config, OffsetTracker offsetTracker) throws
UnknownHostException {
mConfig = config;
mOffsetTracker = offsetTracker;
mConsumerConnector = Consumer.createJavaConsumerConnector(createConsumerConfig());
if (!mConfig.getKafkaTopicBlacklist().isEmpty() && !mConfig.getKafkaTopicFilter().isEmpty()) {
throw new RuntimeException("Topic filter and blacklist cannot be both specified.");
}
TopicFilter topicFilter = !mConfig.getKafkaTopicBlacklist().isEmpty()? new Blacklist(mConfig.getKafkaTopicBlacklist()):
new Whitelist(mConfig.getKafkaTopicFilter());
LOG.debug("Use TopicFilter {}({})", topicFilter.getClass(), topicFilter);
List<KafkaStream<byte[], byte[]>> streams =
mConsumerConnector.createMessageStreamsByFilter(topicFilter);
KafkaStream<byte[], byte[]> stream = streams.get(0);
mIterator = stream.iterator();
mLastAccessTime = new HashMap<TopicPartition, Long>();
StatsUtil.setLabel("secor.kafka.consumer.id", IdUtil.getConsumerId());
mTopicPartitionForgetSeconds = mConfig.getTopicPartitionForgetSeconds();
mCheckMessagesPerSecond = mConfig.getMessagesPerSecond() / mConfig.getConsumerThreads();
}
private void updateAccessTime(TopicPartition topicPartition) {
long now = System.currentTimeMillis() / 1000L;
mLastAccessTime.put(topicPartition, now);
Iterator iterator = mLastAccessTime.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry pair = (Map.Entry) iterator.next();
long lastAccessTime = (Long) pair.getValue();
if (now - lastAccessTime > mTopicPartitionForgetSeconds) {
iterator.remove();
}
}
}
private void exportStats() {
StringBuffer topicPartitions = new StringBuffer();
for (TopicPartition topicPartition : mLastAccessTime.keySet()) {
if (topicPartitions.length() > 0) {
topicPartitions.append(' ');
}
topicPartitions.append(topicPartition.getTopic() + '/' +
topicPartition.getPartition());
}
StatsUtil.setLabel("secor.topic_partitions", topicPartitions.toString());
}
private ConsumerConfig createConsumerConfig() throws UnknownHostException {
Properties props = new Properties();
props.put("zookeeper.connect", mConfig.getZookeeperQuorum() + mConfig.getKafkaZookeeperPath());
props.put("group.id", mConfig.getKafkaGroup());
props.put("zookeeper.session.timeout.ms",
Integer.toString(mConfig.getZookeeperSessionTimeoutMs()));
props.put("zookeeper.sync.time.ms", Integer.toString(mConfig.getZookeeperSyncTimeMs()));
props.put("auto.commit.enable", "false");
props.put("auto.offset.reset", mConfig.getConsumerAutoOffsetReset());
props.put("consumer.timeout.ms", Integer.toString(mConfig.getConsumerTimeoutMs()));
props.put("consumer.id", IdUtil.getConsumerId());
// Properties required to upgrade from kafka 0.8.x to 0.9.x
props.put("dual.commit.enabled", mConfig.getDualCommitEnabled());
props.put("offsets.storage", mConfig.getOffsetsStorage());
props.put("partition.assignment.strategy", mConfig.getPartitionAssignmentStrategy());
if (mConfig.getRebalanceMaxRetries() != null &&
!mConfig.getRebalanceMaxRetries().isEmpty()) {
props.put("rebalance.max.retries", mConfig.getRebalanceMaxRetries());
}
if (mConfig.getRebalanceBackoffMs() != null &&
!mConfig.getRebalanceBackoffMs().isEmpty()) {
props.put("rebalance.backoff.ms", mConfig.getRebalanceBackoffMs());
}
if (mConfig.getSocketReceiveBufferBytes() != null &&
!mConfig.getSocketReceiveBufferBytes().isEmpty()) {
props.put("socket.receive.buffer.bytes", mConfig.getSocketReceiveBufferBytes());
}
if (mConfig.getFetchMessageMaxBytes() != null && !mConfig.getFetchMessageMaxBytes().isEmpty()) {
props.put("fetch.message.max.bytes", mConfig.getFetchMessageMaxBytes());
}
if (mConfig.getFetchMinBytes() != null && !mConfig.getFetchMinBytes().isEmpty()) {
props.put("fetch.min.bytes", mConfig.getFetchMinBytes());
}
if (mConfig.getFetchWaitMaxMs() != null && !mConfig.getFetchWaitMaxMs().isEmpty()) {
props.put("fetch.wait.max.ms", mConfig.getFetchWaitMaxMs());
}
return new ConsumerConfig(props);
}
public boolean hasNext() {
return mIterator.hasNext();
}
public Message read() {
assert hasNext();
mNMessages = (mNMessages + 1) % mCheckMessagesPerSecond;
if (mNMessages % mCheckMessagesPerSecond == 0) {
RateLimitUtil.acquire(mCheckMessagesPerSecond);
}
MessageAndMetadata<byte[], byte[]> kafkaMessage = mIterator.next();
Message message = new Message(kafkaMessage.topic(), kafkaMessage.partition(),
kafkaMessage.offset(), kafkaMessage.key(),
kafkaMessage.message());
TopicPartition topicPartition = new TopicPartition(message.getTopic(),
message.getKafkaPartition());
updateAccessTime(topicPartition);
// Skip already committed messages.
long committedOffsetCount = mOffsetTracker.getTrueCommittedOffsetCount(topicPartition);
LOG.debug("read message {}", message);
if (mNMessages % mCheckMessagesPerSecond == 0) {
exportStats();
}
if (message.getOffset() < committedOffsetCount) {
LOG.debug("skipping message {} because its offset precedes committed offset count {}",
message, committedOffsetCount);
return null;
}
return message;
}
}