package com.linkedin.camus.etl.kafka.common;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Map.Entry;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import com.linkedin.camus.coders.MessageEncoder;
import com.linkedin.camus.etl.kafka.CamusJob;
@JsonIgnoreProperties({ "trackingCount", "lastKey", "eventCount", "RANDOM" })
public class EtlCounts {
private static Logger log = Logger.getLogger(EtlCounts.class);
private static final String TOPIC = "topic";
private static final String GRANULARITY = "granularity";
private static final String COUNTS = "counts";
private static final String START_TIME = "startTime";
private static final String END_TIME = "endTime";
private static final String FIRST_TIMESTAMP = "firstTimestamp";
private static final String LAST_TIMESTAMP = "lastTimestamp";
private static final String ERROR_COUNT = "errorCount";
private static final String MONITORING_EVENT_CLASS = "monitoring.event.class";
public static final int NUM_TRIES_PUBLISH_COUNTS = 3;
private String topic;
private long startTime;
private long granularity;
private long errorCount;
private long endTime;
private long lastTimestamp;
private long firstTimestamp = Long.MAX_VALUE;
protected HashMap<String, Source> counts;
private transient EtlKey lastKey;
private transient int eventCount = 0;
private transient static final Random RANDOM = new Random();
public EtlCounts() {
}
public EtlCounts(String topic, long granularity, long currentTime) {
this.topic = topic;
this.granularity = granularity;
this.startTime = currentTime;
this.counts = new HashMap<String, Source>();
}
public EtlCounts(String topic, long granularity) {
this(topic, granularity, System.currentTimeMillis());
}
public EtlCounts(EtlCounts other) {
this(other.topic, other.granularity, other.startTime);
this.counts = other.counts;
}
public HashMap<String, Source> getCounts() {
return counts;
}
public long getEndTime() {
return endTime;
}
public long getErrorCount() {
return errorCount;
}
public long getFirstTimestamp() {
return firstTimestamp;
}
public long getGranularity() {
return granularity;
}
public long getLastTimestamp() {
return lastTimestamp;
}
public long getStartTime() {
return startTime;
}
public String getTopic() {
return topic;
}
public void setCounts(HashMap<String, Source> counts) {
this.counts = counts;
}
public void setEndTime(long endTime) {
this.endTime = endTime;
}
public void setErrorCount(long errorCount) {
this.errorCount = errorCount;
}
public void setFirstTimestamp(long firstTimestamp) {
this.firstTimestamp = firstTimestamp;
}
public void setGranularity(long granularity) {
this.granularity = granularity;
}
public void setLastTimestamp(long lastTimestamp) {
this.lastTimestamp = lastTimestamp;
}
public void setStartTime(long startTime) {
this.startTime = startTime;
}
public void setTopic(String topic) {
this.topic = topic;
}
public int getEventCount() {
return eventCount;
}
public EtlKey getLastKey() {
return lastKey;
}
public void setEventCount(int eventCount) {
this.eventCount = eventCount;
}
public void setLastKey(EtlKey lastKey) {
this.lastKey = lastKey;
}
public void incrementMonitorCount(EtlKey key) {
long monitorPartition = DateUtils.getPartition(granularity, key.getTime());
Source source = new Source(key.getServer(), key.getService(), monitorPartition);
if (counts.containsKey(source.toString())) {
Source countSource = counts.get(source.toString());
countSource.setCount(countSource.getCount() + 1);
counts.put(countSource.toString(), countSource);
} else {
source.setCount(1);
counts.put(source.toString(), source);
}
if (key.getTime() > lastTimestamp) {
lastTimestamp = key.getTime();
}
if (key.getTime() < firstTimestamp) {
firstTimestamp = key.getTime();
}
lastKey = new EtlKey(key);
eventCount++;
}
public void writeCountsToMap(ArrayList<Map<String, Object>> allCountObject, FileSystem fs, Path path)
throws IOException {
Map<String, Object> countFile = new HashMap<String, Object>();
countFile.put(TOPIC, topic);
countFile.put(GRANULARITY, granularity);
countFile.put(COUNTS, counts);
countFile.put(START_TIME, startTime);
countFile.put(END_TIME, endTime);
countFile.put(FIRST_TIMESTAMP, firstTimestamp);
countFile.put(LAST_TIMESTAMP, lastTimestamp);
countFile.put(ERROR_COUNT, errorCount);
allCountObject.add(countFile);
}
public void postTrackingCountToKafka(Configuration conf, String tier, String brokerList) {
MessageEncoder<IndexedRecord, byte[]> encoder;
AbstractMonitoringEvent monitoringDetails;
try {
encoder =
(MessageEncoder<IndexedRecord, byte[]>) Class.forName(conf.get(CamusJob.CAMUS_MESSAGE_ENCODER_CLASS))
.newInstance();
Properties props = new Properties();
for (Entry<String, String> entry : conf) {
props.put(entry.getKey(), entry.getValue());
}
encoder.init(props, "TrackingMonitoringEvent");
monitoringDetails =
(AbstractMonitoringEvent) Class.forName(getMonitoringEventClass(conf))
.getDeclaredConstructor(Configuration.class).newInstance(conf);
} catch (Exception e1) {
throw new RuntimeException(e1);
}
ArrayList<byte[]> monitorSet = new ArrayList<byte[]>();
int counts = 0;
for (Map.Entry<String, Source> singleCount : this.getCounts().entrySet()) {
Source countEntry = singleCount.getValue();
GenericRecord monitoringRecord =
monitoringDetails.createMonitoringEventRecord(countEntry, topic, granularity, tier);
byte[] message = encoder.toBytes((IndexedRecord) monitoringRecord);
monitorSet.add(message);
if (monitorSet.size() >= 2000) {
counts += monitorSet.size();
produceCount(brokerList, monitorSet);
monitorSet.clear();
}
}
if (monitorSet.size() > 0) {
counts += monitorSet.size();
produceCount(brokerList, monitorSet);
}
log.info(topic + " sent " + counts + " counts");
}
protected String getMonitoringEventClass(Configuration conf) {
return conf.get(MONITORING_EVENT_CLASS);
}
private void produceCount(String brokerList, ArrayList<byte[]> monitorSet) {
// Shuffle the broker
Properties props = new Properties();
props.put("metadata.broker.list", brokerList);
props.put("producer.type", "async");
props.put("request.required.acks", "1");
props.put("request.timeout.ms", "30000");
log.debug("Broker list: " + brokerList);
Producer producer = null;
try {
producer = createProducer(props);
for (byte[] message : monitorSet) {
for (int i = 0; i < NUM_TRIES_PUBLISH_COUNTS; i++) {
try {
KeyedMessage keyedMessage = new KeyedMessage("TrackingMonitoringEvent", message);
producer.send(keyedMessage);
break;
} catch (Exception e) {
log.error("Publishing count for topic " + topic + " to " + brokerList.toString() + " has failed " + (i + 1)
+ " times. " + (NUM_TRIES_PUBLISH_COUNTS - i - 1) + " more attempts will be made.");
if (i == NUM_TRIES_PUBLISH_COUNTS - 1) {
throw new RuntimeException(e.getMessage() + ": " + "Have retried maximum (" + NUM_TRIES_PUBLISH_COUNTS
+ ") times.");
}
try {
Thread.sleep((long) (Math.random() * (i + 1) * 1000));
} catch (InterruptedException e1) {
log.error("Caught interrupted exception between retries of publishing counts to Kafka. "
+ e1.getMessage());
}
}
}
}
} catch (Exception e) {
throw new RuntimeException("failed to publish counts to kafka: " + e.getMessage(), e);
} finally {
if (producer != null) {
producer.close();
}
}
}
protected Producer createProducer(Properties props) {
return new Producer(new ProducerConfig(props));
}
}