package com.neverwinterdp.scribengin.scribeconsumer;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.cluster.Broker;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.message.MessageAndOffset;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.ParameterException;
import com.neverwinterdp.scribengin.commitlog.AbstractScribeCommitLogFactory;
import com.neverwinterdp.scribengin.commitlog.ScribeCommitLog;
import com.neverwinterdp.scribengin.commitlog.ScribeCommitLogFactory;
import com.neverwinterdp.scribengin.commitlog.ScribeLogEntry;
import com.neverwinterdp.scribengin.filesystem.AbstractFileSystemFactory;
import com.neverwinterdp.scribengin.filesystem.FileSystemFactory;
import com.neverwinterdp.scribengin.filesystem.HDFSFileSystemFactory;
import com.neverwinterdp.scribengin.hostport.CustomConvertFactory;
import com.neverwinterdp.scribengin.hostport.HostPort;
import com.neverwinterdp.scribengin.partitioner.AbstractPartitioner;
import com.neverwinterdp.scribengin.partitioner.DatePartitioner;
import com.neverwinterdp.scribengin.partitioner.DumbPartitioner;
import com.neverwinterdp.scribengin.utilities.LostLeadershipException;
import com.neverwinterdp.scribengin.utilities.StringRecordWriter;
public class ScribeConsumer {
// Random comments:
// Unique define a partition. Client name + topic name + offset
// java -cp scribengin-1.0-SNAPSHOT.jar com.neverwinterdp.scribengin.ScribeConsumer --topic scribe --broker_list HOST1:PORT,HOST2:PORT2 --checkpoint_interval 100 --partition 0
// checkout src/main/java/com/neverwinterdp/scribengin/ScribeConsumer.java
// checkout org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
// checkout EtlMultiOutputCommitter in Camus
// /usr/lib/kafka/bin/kafka-console-producer.sh --topic scribe --broker-list 10.0.2.15:9092
//Variables set by constructor method/command line parameters
private String PRE_COMMIT_PATH_PREFIX;
private String COMMIT_PATH_PREFIX;
private String topic;
private int partition;
private List<HostPort> brokerList; // list of (host:port)s
private long commitCheckPointInterval; // ms
private String hdfsPath = null;
//private String libhadoopPath = "/usr/lib/hadoop/lib/native/libhadoop.so";
//Set by cleanStart() method
private boolean cleanStart = false;
//Instantiated at time of object instantiation
private Timer checkPointIntervalTimer;
private Timer partitionerUpdateTimer;
private List<HostPort> replicaBrokers; // list of (host:port)s
//Private class variables
private AbstractPartitioner partitioner = null;
private String currTmpDataPath;
private String currDataPath;
private AbstractScribeCommitLogFactory scribeCommitLogFactory;
private AbstractFileSystemFactory fileSystemFactory;
private long lastCommittedOffset;
private long offset; // offset is on a per line basis. starts on the last valid offset
private Thread serverThread;
private SimpleConsumer consumer;
private static final Logger LOG = Logger.getLogger(ScribeConsumer.class.getName());
public ScribeConsumer() {
checkPointIntervalTimer = new Timer();
partitionerUpdateTimer = new Timer();
replicaBrokers = new ArrayList<HostPort>();
}
public ScribeConsumer(ScribeConsumerConfig c) {
this();
this.PRE_COMMIT_PATH_PREFIX = c.PRE_COMMIT_PATH_PREFIX;
this.COMMIT_PATH_PREFIX = c.COMMIT_PATH_PREFIX;
this.topic = c.topic;
this.partition = c.partition;
this.brokerList = c.brokerList;
this.commitCheckPointInterval = c.commitCheckPointInterval;
this.cleanStart = c.cleanStart;
this.hdfsPath = c.hdfsPath;
if (c.date_partitioner != null) {
this.setPartitioner(new DatePartitioner(c.date_partitioner));
}
//this.libhadoopPath = c.libHadoopPath;
}
public ScribeConsumer(String preCommitPathPrefix, String commitPathPrefix, String topic,
int partition, List<HostPort> brokerList, long commitCheckPointInterval) {
this();
this.PRE_COMMIT_PATH_PREFIX = preCommitPathPrefix;
this.COMMIT_PATH_PREFIX = commitPathPrefix;
this.topic = topic;
this.partition = partition;
this.brokerList = brokerList;
this.commitCheckPointInterval = commitCheckPointInterval;
}
public ScribeConsumer(String preCommitPathPrefix, String commitPathPrefix, String topic,
int partition, List<HostPort> brokerList, long commitCheckPointInterval, boolean cleanStart,
String hdfsPath) {
this(preCommitPathPrefix, commitPathPrefix, topic, partition, brokerList,
commitCheckPointInterval);
this.cleanStart = cleanStart;
this.hdfsPath = hdfsPath;
}
/**
* Call this method before calling init()
* @param p
*/
public void setPartitioner(AbstractPartitioner p) {
this.partitioner = p;
}
public void setScribeCommitLogFactory(AbstractScribeCommitLogFactory factory) {
scribeCommitLogFactory = factory;
}
public void setFileSystemFactory(AbstractFileSystemFactory factory) {
fileSystemFactory = factory;
}
public void init() throws IOException {
if (partitioner == null) {
partitioner = new DumbPartitioner();
}
if (this.hdfsPath == null) {
setScribeCommitLogFactory(ScribeCommitLogFactory.instance(getCommitLogAbsPath()));
setFileSystemFactory(FileSystemFactory.instance());
}
else {
setScribeCommitLogFactory(ScribeCommitLogFactory.instance(this.hdfsPath
+ getCommitLogAbsPath()));
setFileSystemFactory(HDFSFileSystemFactory.instance());
}
}
public boolean connectToTopic() {
boolean r = true;
PartitionMetadata metadata = findLeader(brokerList, topic, partition);
if (metadata == null) {
r = false;
LOG.error("Can't find meta data for Topic: " + topic + " partition: " + partition
+ ". In fact, meta is null.");
}
else if (metadata.leader() == null) {
r = false;
LOG.error("Can't find meta data for Topic: " + topic + " partition: " + partition);
}
if (r) {
storeReplicaBrokers(metadata);
consumer = new SimpleConsumer(
metadata.leader().host(),
metadata.leader().port(),
10000, // timeout
64 * 1024, // buffersize
getClientName());
//scheduleCommitTimer();
}
return r;
}
public void start() {
serverThread = new Thread() {
public void run() {
try {
runServerLoop();
} catch (Exception e) {
e.printStackTrace();
}
}
};
serverThread.start();
}
public Thread.State getServerState() {
return serverThread.getState();
}
public void stop() {
try {
checkPointIntervalTimer.cancel();
} catch (Exception e) {
e.printStackTrace();
}
try {
serverThread.interrupt();
} catch (Exception e) {
e.printStackTrace();
}
commit();
}
private void scheduleCommitTimer() {
checkPointIntervalTimer.schedule(new TimerTask() {
@Override
public void run() {
commit();
}
}, commitCheckPointInterval);
}
private void schedulePartitionUpdateTime() {
if (this.partitioner.getRefresh() == null) {
return;
}
this.partitionerUpdateTimer.schedule(new TimerTask() {
@Override
public void run() {
commit();
}
}, partitioner.getRefresh());
}
private void commitData(String src, String dest) {
LOG.info(">> atomic commit: " + this.topic);
FileSystem fs = null;
Path destPath = new Path(dest);
try {
fs = fileSystemFactory.build(URI.create(src));
//If source doesn't exist, don't commit
if(!fs.exists(new Path(src))){
return;
}
if (!fs.exists(destPath.getParent())) {
fs.mkdirs(destPath.getParent());
}
fs.rename(new Path(src), destPath);
} catch (IOException e) {
//TODO : LOG
e.printStackTrace();
} finally {
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
//TODO : LOG
e.printStackTrace();
}
}
}
}
private synchronized void commit() {
//LOG.info(">> committing: " + this.topic);
if (lastCommittedOffset != offset || (this.cleanStart == true && offset == 0 && lastCommittedOffset == 0)) {
//Commit
// First record the to-be taken action in the WAL.
// Then, mv the tmp data file to it's location.
long startOffset = lastCommittedOffset + 1;
long endOffset = offset;
LOG.info("\tstartOffset : " + String.valueOf(startOffset)); //xxx
LOG.info("\tendOffset : " + String.valueOf(endOffset)); //xxx
LOG.info("\ttmpDataPath : " + currTmpDataPath); //xxx
LOG.info("\tDataPath : " + currDataPath); //xxx
ScribeCommitLog log = null;
try {
log = scribeCommitLogFactory.build();
} catch (IOException e) {
LOG.error(e.getMessage());
e.printStackTrace();
}
try {
log.record(startOffset, endOffset, currTmpDataPath, currDataPath);
} catch (NoSuchAlgorithmException | IOException e) {
LOG.error(e.getMessage());
e.printStackTrace();
}
LOG.info(">> committing scribelog: " + this.topic);
commitData(currTmpDataPath, currDataPath);
lastCommittedOffset = offset;
generateTmpAndDestDataPaths();
}
try{
scheduleCommitTimer();
} catch (IllegalStateException e){
LOG.error("Could not reschedule commit timer. Ignore if this is during ScribeConsumer shutdown.");
}
}
private String getClientName() {
return "scribe_" + topic + "_" + partition;
}
private String getCommitLogAbsPath() {
return PRE_COMMIT_PATH_PREFIX + "/" + getClientName() + ".log";
}
private void generateTmpAndDestDataPaths() {
long ts = System.currentTimeMillis();// / 1000L;
this.currTmpDataPath = PRE_COMMIT_PATH_PREFIX + "/scribe.data." + topic + "."+ ts;
this.currDataPath =
COMMIT_PATH_PREFIX + "/" + this.partitioner.getPartition() + "/scribe.data." + topic + "."+ts;
if (this.hdfsPath != null) {
this.currTmpDataPath = this.hdfsPath + this.currTmpDataPath;
this.currDataPath = this.hdfsPath + this.currDataPath;
}
schedulePartitionUpdateTime();
}
private String getTmpDataPathPattern() {
return PRE_COMMIT_PATH_PREFIX + "/scribe.data.*";
}
private long getLatestOffsetFromKafka(String topic, int partition, long startTime) {
TopicAndPartition tp = new TopicAndPartition(topic, partition);
Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo =
new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
requestInfo.put(tp, new PartitionOffsetRequestInfo(startTime, 1));
OffsetRequest req = new OffsetRequest(
requestInfo, kafka.api.OffsetRequest.CurrentVersion(), getClientName());
OffsetResponse resp = consumer.getOffsetsBefore(req);
if (resp.hasError()) {
LOG.error("error when fetching offset: " + resp.errorCode(topic, partition)); //xxx
// In case you wonder what the error code really means.
// System.out.println("OffsetOutOfRangeCode()" + ErrorMapping.OffsetOutOfRangeCode());
// System.out.println("BrokerNotAvailableCode()" + ErrorMapping.BrokerNotAvailableCode());
// System.out.println("InvalidFetchSizeCode()" + ErrorMapping.InvalidFetchSizeCode());
// System.out.println("InvalidMessageCode()" + ErrorMapping.InvalidMessageCode());
// System.out.println("LeaderNotAvailableCode()" + ErrorMapping.LeaderNotAvailableCode());
// System.out.println("MessageSizeTooLargeCode()" + ErrorMapping.MessageSizeTooLargeCode());
// System.out.println("NotLeaderForPartitionCode()" + ErrorMapping.NotLeaderForPartitionCode());
// System.out.println("OffsetMetadataTooLargeCode()" + ErrorMapping.OffsetMetadataTooLargeCode());
// System.out.println("ReplicaNotAvailableCode()" + ErrorMapping.ReplicaNotAvailableCode());
// System.out.println("RequestTimedOutCode()" + ErrorMapping.RequestTimedOutCode());
// System.out.println("StaleControllerEpochCode()" + ErrorMapping.StaleControllerEpochCode());
// System.out.println("UnknownCode()" + ErrorMapping.UnknownCode());
// System.out.println("UnknownTopicOrPartitionCode()" + ErrorMapping.UnknownTopicOrPartitionCode());
//LOG.error("error when fetching offset: " + resp.errorcode(topic, partition));
return 0;
}
return resp.offsets(topic, partition)[0];
}
private long getEarliestOffsetFromKafka(String topic, int partition, long startTime) {
LOG.info("getEarliestOffsetFromKafka.");
TopicAndPartition tp = new TopicAndPartition(topic, partition);
Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo =
new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
requestInfo.put(tp, new PartitionOffsetRequestInfo(startTime, 1));
OffsetRequest req = new OffsetRequest(
requestInfo, kafka.api.OffsetRequest.CurrentVersion(), getClientName());
OffsetResponse resp = consumer.getOffsetsBefore(req);
if (resp.hasError()) {
LOG.error("error when fetching offset: " + resp.errorCode(topic, partition)); //xxx
return 0;
}
LOG.info("Earliest offset " + resp.offsets(topic, partition)[0]);
return resp.offsets(topic, partition)[0];
}
private void DeleteUncommittedData() throws IOException
{
FileSystem fs;
if (this.hdfsPath != null) {
fs = fileSystemFactory.build(URI.create(this.hdfsPath));
}
else {
fs = fileSystemFactory.build();
}
// Corrupted log file
// Clean up. Delete the tmp data file if present.
FileStatus[] fileStatusArry = fs.globStatus(new Path(getTmpDataPathPattern()));
//So we don't try to write to a bad path
generateTmpAndDestDataPaths();
for (int i = 0; i < fileStatusArry.length; i++) {
FileStatus fileStatus = fileStatusArry[i];
try{
fs.delete(fileStatus.getPath(),false);
}
catch(Exception e){
LOG.error(e.getMessage());
e.printStackTrace();
}
}
fs.close();
}
private ScribeLogEntry getLatestValidEntry(ScribeCommitLog log) {
ScribeLogEntry entry = null;
try {
do {
entry = log.getLatestEntry();
} while (entry != null && !entry.isCheckSumValid());
} catch (NoSuchAlgorithmException e) {
LOG.error(e.getMessage());
e.printStackTrace();
}
return entry;
}
private long getLatestOffsetFromCommitLog() {
// Here's where we do recovery.
long r = -1;
ScribeLogEntry entry = null;
try {
ScribeCommitLog log = scribeCommitLogFactory.build();
log.read();
entry = log.getLatestEntry();
if (entry != null && entry.isCheckSumValid()) {
String tmpDataFilePath = entry.getSrcPath();
FileSystem fs = fileSystemFactory.build(URI.create(tmpDataFilePath));
if (fs.exists(new Path(tmpDataFilePath))) {
// mv to the dest
commitData(tmpDataFilePath, entry.getDestPath());
} else {
// Data has been committed
// Or, it never got around to write to the log.
// Delete tmp data file just in case.
DeleteUncommittedData();
}
} else {
DeleteUncommittedData();
entry = getLatestValidEntry(log);
}
} catch (IOException e) {
LOG.error(e.getMessage());
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
LOG.error(e.getMessage());
e.printStackTrace();
}
if (entry != null) {
r = entry.getEndOffset();
}
return r;
}
private long getLatestOffset(String topic, int partition, long startTime) {
long offsetFromCommitLog = getLatestOffsetFromCommitLog();
LOG.info(" getLatestOffsetFromCommitLog >>>> " + offsetFromCommitLog); //xxx
long offsetFromKafka = getLatestOffsetFromKafka(topic, partition, startTime);
long r;
if (offsetFromCommitLog == -1) {
r = offsetFromKafka;
} else if (offsetFromCommitLog < offsetFromKafka) {
r = offsetFromCommitLog;
} else if (offsetFromCommitLog == offsetFromKafka) {
r = offsetFromKafka;
} else { // offsetFromCommitLog > offsetFromKafka
// TODO: log.warn. Someone is screwing with kafka's offset
r = offsetFromKafka;
}
return r;
}
private PartitionMetadata findLeader(List<HostPort> seedBrokers, String topic, int partition) {
PartitionMetadata returnMetaData = null;
for (HostPort broker : seedBrokers) {
SimpleConsumer consumer = null;
String seed = broker.getHost();
int port = broker.getPort();
try {
consumer = new SimpleConsumer(seed, port, 100000, 64 * 1024, "leaderLookup");
List<String> topics = Collections.singletonList(topic);
TopicMetadataRequest req = new TopicMetadataRequest(topics);
kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
List<TopicMetadata> metaData = resp.topicsMetadata();
for (TopicMetadata item : metaData) {
for (PartitionMetadata part : item.partitionsMetadata()) {
if (part.partitionId() == partition) {
returnMetaData = part;
return returnMetaData;
}
}
}
} catch (Exception e) {
LOG.error("Error communicating with Broker " + seed + ":" + port
+ " while trying to find leader for " + topic
+ ", " + partition + " | Reason: " + e);
} finally {
if (consumer != null)
consumer.close();
}
}
return returnMetaData;
}
private HostPort findNewLeader(String oldHost, int oldPort) throws LostLeadershipException {
for (int i = 0; i < 3; i++) {
boolean goToSleep = false;
PartitionMetadata metadata = findLeader(replicaBrokers, topic, partition);
if (metadata == null) {
goToSleep = true;
} else if (metadata.leader() == null) {
goToSleep = true;
} else if (oldHost.equalsIgnoreCase(metadata.leader().host()) &&
oldPort == metadata.leader().port()) {
// first time through if the leader hasn't changed give ZooKeeper a second to recover
// second time, assume the broker did recover before failover, or it was a non-Broker issue
goToSleep = true;
} else {
return new HostPort(metadata.leader().host(), metadata.leader().port());
}
if (goToSleep) {
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
}
}
// Can't recover from a leadership disappearance.
throw new LostLeadershipException();
}
private void storeReplicaBrokers(PartitionMetadata metadata) {
replicaBrokers.clear();
for (Broker replica : metadata.replicas()) {
replicaBrokers.add(new HostPort(replica.host(), replica.port()));
}
}
public void runServerLoop() throws IOException, LostLeadershipException, InterruptedException {
LOG.info("Are we clean starting? " + cleanStart);
int retry = 0;
int retryLimit = 10;
while (!connectToTopic()) {
LOG.error("Could not connect to topic...");
Thread.sleep(1000);
retry++;
if (retry > retryLimit) {
return;
}
}
generateTmpAndDestDataPaths();
// lastCommittedOffset = getLatestOffset(topic, partition, kafka.api.OffsetRequest.LatestTime());
//TODO clear commit log
if (cleanStart) {
offset = getEarliestOffsetFromKafka(topic, partition, kafka.api.OffsetRequest.EarliestTime());
lastCommittedOffset = offset;
DeleteUncommittedData();
clearCommitLog();
}
else {
lastCommittedOffset = getLatestOffset(topic, partition, kafka.api.OffsetRequest.LatestTime());
long earliestOffset =
getEarliestOffsetFromKafka(topic, partition, kafka.api.OffsetRequest.EarliestTime());
if (earliestOffset == lastCommittedOffset) {
LOG.error("Scribe consumer is consuming from the first offset yet --clean_start was not defined. Throwing a IllegalStateException."+
"This error means that we have detected an abnormality - normally our latestOffset shouldn't match the lastCommittedOffset.");
throw new IllegalStateException(
"Scribe consumer is consuming from the first offset yet --clean_start was not defined.");
}
offset = lastCommittedOffset;
}
LOG.info(">> lastCommittedOffset: " + lastCommittedOffset); //xxx
scheduleCommitTimer();
while (true) {
//LOG.info(">> offset: " + offset); //xxx
FetchRequest req = new FetchRequestBuilder()
.clientId(getClientName())
.addFetch(topic, partition, offset, 100000)
.build();
FetchResponse resp = consumer.fetch(req);
if (resp.hasError()) {
//If we got an invalid offset, reset it by asking for the last element.
//For all other errors, assume the worst and find ourselves a new leader from the replica.
short code = resp.errorCode(topic, partition);
LOG.info("Encounter error when fetching from consumer. Error Code: " + code);
if (code == ErrorMapping.OffsetOutOfRangeCode()) {
// We asked for an invalid offset. For simple case ask for the last element to reset
LOG.info("inside errormap");
offset = getLatestOffsetFromKafka(topic, partition, kafka.api.OffsetRequest.LatestTime());
continue;
} else {
String oldHost = consumer.host();
int oldPort = consumer.port();
consumer.close();
consumer = null;
HostPort newHostPort = findNewLeader(oldHost, oldPort);
consumer = new SimpleConsumer(
newHostPort.getHost(),
newHostPort.getPort(),
10000, // timeout
64 * 1024, // buffersize
getClientName());
continue;
}
}
long msgReadCnt = 0;
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
synchronized (this) {
for (MessageAndOffset messageAndOffset : resp.messageSet(topic, partition)) {
long currentOffset = messageAndOffset.offset();
if (currentOffset < offset) {
LOG.info("Found an old offset: " + currentOffset + "Expecting: " + offset);
continue;
}
offset = messageAndOffset.nextOffset();
ByteBuffer payload = messageAndOffset.message().payload();
byte[] bytes = new byte[payload.limit()];
payload.get(bytes);
outputStream.write(bytes);
outputStream.write("\n".getBytes());
LOG.info("Concatenating for tmp string: "+String.valueOf(messageAndOffset.offset()) + ": " + new String(bytes));
// Write to HDFS /tmp partition
msgReadCnt++;
}// for
}
if (msgReadCnt == 0) {
try {
Thread.sleep(1000); //Didn't read anything, so go to sleep for awhile.
} catch (InterruptedException e) {
}
}
else{
LOG.info("Writing to tmp: "+new String(outputStream.toByteArray()));
StringRecordWriter writer = new StringRecordWriter(currTmpDataPath);
writer.write(outputStream.toByteArray());
writer.close();
outputStream.close();
}
} // while
}
private void clearCommitLog() throws IOException {
ScribeCommitLog log = scribeCommitLogFactory.build();
log.clear();
}
public void cleanStart(boolean b) {
cleanStart = b;
}
public static void main(String[] args) throws IOException {
ScribeConsumerCommandLineArgs p = new ScribeConsumerCommandLineArgs();
JCommander jc = new JCommander(p);
jc.addConverterFactory(new CustomConvertFactory());
try {
jc.parse(args);
} catch (ParameterException e) {
System.err.println(e.getMessage());
jc.usage();
System.exit(-1);
}
ScribeConsumer sc =
new ScribeConsumer(p.preCommitPrefix, p.commitPrefix, p.topic, p.partition, p.brokerList,
p.commitCheckPointInterval, p.cleanstart, p.hdfsPath);
if (p.date_partitioner != null) {
sc.setPartitioner(new DatePartitioner(p.date_partitioner));
}
sc.init();
try {
sc.runServerLoop();
} catch (LostLeadershipException e) {
LOG.fatal("Leader went away. Couldn't find a new leader!");
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}