/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.realtime.impl.kafka;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.joda.time.Duration;
import org.joda.time.Period;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata;
import com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata;
import com.linkedin.pinot.common.utils.CommonConstants.Helix;
import com.linkedin.pinot.core.realtime.StreamProviderConfig;
import static com.linkedin.pinot.common.utils.EqualityUtils.hashCodeOf;
import static com.linkedin.pinot.common.utils.EqualityUtils.isEqual;
import static com.linkedin.pinot.common.utils.EqualityUtils.isNullOrNotSameClass;
import static com.linkedin.pinot.common.utils.EqualityUtils.isSameReference;
import kafka.consumer.ConsumerConfig;
public class KafkaHighLevelStreamProviderConfig implements StreamProviderConfig {
private static final Map<String, String> defaultProps;
private static final int DEFAULT_MAX_REALTIME_ROWS_COUNT = 5000000;
private final static long ONE_MINUTE_IN_MILLSEC = 1000 * 60;
public static final long ONE_HOUR = ONE_MINUTE_IN_MILLSEC * 60;
private final static PeriodFormatter PERIOD_FORMATTER;
static {
defaultProps = new HashMap<String, String>();
/*defaultProps.put("zookeeper.connect", zookeeper);
defaultProps.put("group.id", groupId);*/
defaultProps.put("zookeeper.session.timeout.ms", "30000");
defaultProps.put("zookeeper.connection.timeout.ms", "10000");
defaultProps.put("zookeeper.sync.time.ms", "2000");
// Rebalance retries will take up to 1 mins to fail.
defaultProps.put("rebalance.max.retries", "30");
defaultProps.put("rebalance.backoff.ms", "2000");
defaultProps.put("auto.commit.enable", "false");
defaultProps.put(Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET, "largest");
// A formatter for time specification that allows time to be specified in days, hours and minutes
// e.g. 1d2h3m, or 6h5m or simply 5h
PERIOD_FORMATTER = new PeriodFormatterBuilder()
.appendDays().appendSuffix("d")
.appendHours().appendSuffix("h")
.appendMinutes().appendSuffix("m")
.toFormatter();
}
public static int getDefaultMaxRealtimeRowsCount() {
return DEFAULT_MAX_REALTIME_ROWS_COUNT;
}
private String kafkaTopicName;
private String zkString;
private String groupId;
private KafkaMessageDecoder decoder;
private String decodeKlass;
private Schema indexingSchema;
private Map<String, String> decoderProps;
private Map<String, String> kafkaConsumerProps;
private long segmentTimeInMillis = ONE_HOUR;
private int realtimeRecordsThreshold = DEFAULT_MAX_REALTIME_ROWS_COUNT;
public KafkaHighLevelStreamProviderConfig() {
}
/*
* kafka.hlc.zk.connect.string : comma separated list of hosts
* kafka.hlc.broker.port : broker port
* kafka.hlc.group.id : group id
* kafka.decoder.class.name : the absolute path of the decoder class name
* kafka.decoder.props1 : every property that is prefixed with kafka.decoder.
* */
@Override
public void init(Map<String, String> properties, Schema schema) {
decoderProps = new HashMap<String, String>();
kafkaConsumerProps = new HashMap<>();
this.indexingSchema = schema;
if (properties.containsKey(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.GROUP_ID)) {
this.groupId = properties.get(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.GROUP_ID);
}
if (properties.containsKey(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.ZK_CONNECTION_STRING)) {
this.zkString = properties.get(Helix.DataSource.Realtime.Kafka.HighLevelConsumer.ZK_CONNECTION_STRING);
}
if (properties.containsKey(Helix.DataSource.Realtime.Kafka.TOPIC_NAME)) {
this.kafkaTopicName = properties.get(Helix.DataSource.Realtime.Kafka.TOPIC_NAME);
}
if (properties.containsKey(Helix.DataSource.Realtime.Kafka.DECODER_CLASS)) {
this.decodeKlass = properties.get(Helix.DataSource.Realtime.Kafka.DECODER_CLASS);
}
if (groupId == null || zkString == null || kafkaTopicName == null || this.decodeKlass == null) {
throw new RuntimeException("Cannot initialize KafkaHighLevelStreamProviderConfig as: " + "groupId = " + groupId
+ ", zkString = " + zkString + ", kafkaTopicName = " + kafkaTopicName + ", decodeKlass = " + decodeKlass);
}
if (properties.containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE)) {
realtimeRecordsThreshold =
Integer.parseInt(properties.get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE));
}
if (properties.containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME)) {
segmentTimeInMillis =
Long.parseLong(properties.get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME));
}
for (String key : properties.keySet()) {
if (key.startsWith(Helix.DataSource.Realtime.Kafka.DECODER_PROPS_PREFIX)) {
decoderProps.put(Helix.DataSource.Realtime.Kafka.getDecoderPropertyKey(key), properties.get(key));
}
if (key.startsWith(Helix.DataSource.Realtime.Kafka.KAFKA_CONSUMER_PROPS_PREFIX)) {
kafkaConsumerProps.put(Helix.DataSource.Realtime.Kafka.getConsumerPropertyKey(key), properties.get(key));
}
}
}
@Override
public Schema getSchema() {
return indexingSchema;
}
public String getTopicName() {
return this.kafkaTopicName;
}
public Map<String, Integer> getTopicMap(int numThreads) {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(kafkaTopicName, numThreads);
return topicCountMap;
}
public ConsumerConfig getKafkaConsumerConfig() {
Properties props = new Properties();
for (String key : defaultProps.keySet()) {
props.put(key, defaultProps.get(key));
}
for (String key : kafkaConsumerProps.keySet()) {
props.put(key, kafkaConsumerProps.get(key));
}
props.put("group.id", groupId);
props.put("zookeeper.connect", zkString);
return new ConsumerConfig(props);
}
public KafkaMessageDecoder getDecoder() throws Exception {
KafkaMessageDecoder ret = (KafkaMessageDecoder) Class.forName(decodeKlass).newInstance();
ret.init(decoderProps, indexingSchema, kafkaTopicName);
return ret;
}
@Override
public String getStreamProviderClass() {
return null;
}
@Override
public void init(AbstractTableConfig tableConfig, InstanceZKMetadata instanceMetadata, Schema schema) {
this.indexingSchema = schema;
if (instanceMetadata != null) {
// For LL segments, instanceZkMetadata will be null
this.groupId = instanceMetadata.getGroupId(tableConfig.getTableName());
}
KafkaStreamMetadata kafkaMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
this.kafkaTopicName = kafkaMetadata.getKafkaTopicName();
this.decodeKlass = kafkaMetadata.getDecoderClass();
this.decoderProps = kafkaMetadata.getDecoderProperties();
this.kafkaConsumerProps = kafkaMetadata.getKafkaConsumerProperties();
this.zkString = kafkaMetadata.getZkBrokerUrl();
if (tableConfig.getIndexingConfig().getStreamConfigs().containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE)) {
realtimeRecordsThreshold =
Integer.parseInt(tableConfig.getIndexingConfig().getStreamConfigs().get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE));
}
if (tableConfig.getIndexingConfig().getStreamConfigs().containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME)) {
segmentTimeInMillis =
convertToMs(tableConfig.getIndexingConfig().getStreamConfigs().get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME));
}
}
@Override
public String getStreamName() {
return getTopicName();
}
@Override
public int getSizeThresholdToFlushSegment() {
return realtimeRecordsThreshold;
}
@Override
public long getTimeThresholdToFlushSegment() {
return segmentTimeInMillis;
}
String getGroupId() {
return groupId;
}
String getZkString() {
return zkString;
}
protected long convertToMs(String timeStr) {
long ms = -1;
try {
ms = Long.valueOf(timeStr);
} catch (NumberFormatException e1) {
try {
Period p = PERIOD_FORMATTER.parsePeriod(timeStr);
Duration d = p.toStandardDuration();
ms = d.getStandardSeconds() * 1000L;
} catch (Exception e2) {
throw new RuntimeException("Invalid time spec '" + timeStr + "' (Valid examples: '3h', '4h30m')", e2);
}
}
return ms;
}
@Override
public boolean equals(Object o) {
if (isSameReference(this, o)) {
return true;
}
if (isNullOrNotSameClass(this, o)) {
return false;
}
KafkaHighLevelStreamProviderConfig that = (KafkaHighLevelStreamProviderConfig) o;
return isEqual(segmentTimeInMillis, that.segmentTimeInMillis) &&
isEqual(realtimeRecordsThreshold, that.realtimeRecordsThreshold) &&
isEqual(kafkaTopicName, that.kafkaTopicName) &&
isEqual(zkString, that.zkString) &&
isEqual(groupId, that.groupId) &&
isEqual(decoder, that.decoder) &&
isEqual(decodeKlass, that.decodeKlass) &&
isEqual(indexingSchema, that.indexingSchema) &&
isEqual(decoderProps, that.decoderProps) &&
isEqual(kafkaConsumerProps, that.kafkaConsumerProps);
}
@Override
public int hashCode() {
int result = hashCodeOf(kafkaTopicName);
result = hashCodeOf(result, zkString);
result = hashCodeOf(result, groupId);
result = hashCodeOf(result, decoder);
result = hashCodeOf(result, decodeKlass);
result = hashCodeOf(result, indexingSchema);
result = hashCodeOf(result, decoderProps);
result = hashCodeOf(result, kafkaConsumerProps);
result = hashCodeOf(result, segmentTimeInMillis);
result = hashCodeOf(result, realtimeRecordsThreshold);
return result;
}
}