package com.linkedin.camus.etl.kafka.mapred;
import com.google.common.base.Strings;
import com.linkedin.camus.coders.CamusWrapper;
import com.linkedin.camus.coders.MessageDecoder;
import com.linkedin.camus.etl.kafka.CamusJob;
import com.linkedin.camus.etl.kafka.coders.KafkaAvroMessageDecoder;
import com.linkedin.camus.etl.kafka.coders.MessageDecoderFactory;
import com.linkedin.camus.etl.kafka.common.EmailClient;
import com.linkedin.camus.etl.kafka.common.EtlKey;
import com.linkedin.camus.etl.kafka.common.EtlRequest;
import com.linkedin.camus.etl.kafka.common.LeaderInfo;
import com.linkedin.camus.workallocater.CamusRequest;
import com.linkedin.camus.workallocater.WorkAllocator;
import java.io.IOException;
import java.net.URI;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.consumer.SimpleConsumer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
/**
* Input format for a Kafka pull job.
*/
public class EtlInputFormat extends InputFormat<EtlKey, CamusWrapper> {
public static final String KAFKA_BLACKLIST_TOPIC = "kafka.blacklist.topics";
public static final String KAFKA_WHITELIST_TOPIC = "kafka.whitelist.topics";
public static final String KAFKA_MOVE_TO_LAST_OFFSET_LIST = "kafka.move.to.last.offset.list";
public static final String KAFKA_MOVE_TO_EARLIEST_OFFSET = "kafka.move.to.earliest.offset";
public static final String KAFKA_CLIENT_BUFFER_SIZE = "kafka.client.buffer.size";
public static final String KAFKA_CLIENT_SO_TIMEOUT = "kafka.client.so.timeout";
public static final String KAFKA_MAX_PULL_HRS = "kafka.max.pull.hrs";
public static final String KAFKA_MAX_PULL_MINUTES_PER_TASK = "kafka.max.pull.minutes.per.task";
public static final String KAFKA_MAX_HISTORICAL_DAYS = "kafka.max.historical.days";
public static final String CAMUS_MESSAGE_DECODER_CLASS = "camus.message.decoder.class";
public static final String ETL_IGNORE_SCHEMA_ERRORS = "etl.ignore.schema.errors";
public static final String ETL_AUDIT_IGNORE_SERVICE_TOPIC_LIST = "etl.audit.ignore.service.topic.list";
public static final String CAMUS_WORK_ALLOCATOR_CLASS = "camus.work.allocator.class";
public static final String CAMUS_WORK_ALLOCATOR_DEFAULT = "com.linkedin.camus.workallocater.BaseAllocator";
private static final int BACKOFF_UNIT_MILLISECONDS = 1000;
public static final int NUM_TRIES_PARTITION_METADATA = 3;
public static final int NUM_TRIES_FETCH_FROM_LEADER = 3;
public static final int NUM_TRIES_TOPIC_METADATA = 3;
public static boolean reportJobFailureDueToOffsetOutOfRange = false;
public static boolean reportJobFailureUnableToGetOffsetFromKafka = false;
public static boolean reportJobFailureDueToLeaderNotAvailable = false;
private static Logger log = null;
public EtlInputFormat() {
if (log == null)
log = Logger.getLogger(getClass());
}
public static void setLogger(Logger log) {
EtlInputFormat.log = log;
}
@Override
public RecordReader<EtlKey, CamusWrapper> createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
return new EtlRecordReader(this, split, context);
}
/**
* Gets the metadata from Kafka
*
* @param context
* @param metaRequestTopics specify the list of topics to get topicMetadata. The empty list means
* get the TopicsMetadata for all topics.
* @return the list of TopicMetadata
*/
public List<TopicMetadata> getKafkaMetadata(JobContext context, List<String> metaRequestTopics) {
CamusJob.startTiming("kafkaSetupTime");
String brokerString = CamusJob.getKafkaBrokers(context);
if (brokerString.isEmpty())
throw new InvalidParameterException("kafka.brokers must contain at least one node");
List<String> brokers = Arrays.asList(brokerString.split("\\s*,\\s*"));
Collections.shuffle(brokers);
boolean fetchMetaDataSucceeded = false;
int i = 0;
List<TopicMetadata> topicMetadataList = null;
Exception savedException = null;
while (i < brokers.size() && !fetchMetaDataSucceeded) {
SimpleConsumer consumer = createBrokerConsumer(context, brokers.get(i));
log.info(String.format("Fetching metadata from broker %s with client id %s for %d topic(s) %s", brokers.get(i),
consumer.clientId(), metaRequestTopics.size(), metaRequestTopics));
try {
for (int iter = 0; iter < NUM_TRIES_TOPIC_METADATA; iter++) {
try {
topicMetadataList = consumer.send(new TopicMetadataRequest(metaRequestTopics)).topicsMetadata();
fetchMetaDataSucceeded = true;
break;
} catch (Exception e) {
savedException = e;
log.warn(String.format(
"Fetching topic metadata with client id %s for topics [%s] from broker [%s] failed, iter[%s]",
consumer.clientId(), metaRequestTopics, brokers.get(i), iter), e);
try {
Thread.sleep((long) (Math.random() * (iter + 1) * 1000));
} catch (InterruptedException ex) {
log.warn("Caught InterruptedException: " + ex);
}
}
}
} finally {
consumer.close();
i++;
}
}
if (!fetchMetaDataSucceeded) {
throw new RuntimeException("Failed to obtain metadata!", savedException);
}
CamusJob.stopTiming("kafkaSetupTime");
return topicMetadataList;
}
private SimpleConsumer createBrokerConsumer(JobContext context, String broker) {
if (!broker.matches(".+:\\d+"))
throw new InvalidParameterException("The kakfa broker " + broker + " must follow address:port pattern");
String[] hostPort = broker.split(":");
return createSimpleConsumer(context, hostPort[0], Integer.valueOf(hostPort[1]));
}
public SimpleConsumer createSimpleConsumer(JobContext context, String host, int port) {
SimpleConsumer consumer =
new SimpleConsumer(host, port, CamusJob.getKafkaTimeoutValue(context), CamusJob.getKafkaBufferSize(context),
CamusJob.getKafkaClientName(context));
return consumer;
}
/**
* Gets the latest offsets and create the requests as needed
*
* @param context
* @param offsetRequestInfo
* @return
*/
public ArrayList<CamusRequest> fetchLatestOffsetAndCreateEtlRequests(JobContext context,
HashMap<LeaderInfo, ArrayList<TopicAndPartition>> offsetRequestInfo) {
ArrayList<CamusRequest> finalRequests = new ArrayList<CamusRequest>();
for (LeaderInfo leader : offsetRequestInfo.keySet()) {
SimpleConsumer consumer = createSimpleConsumer(context, leader.getUri().getHost(), leader.getUri().getPort());
// Latest Offset
PartitionOffsetRequestInfo partitionLatestOffsetRequestInfo =
new PartitionOffsetRequestInfo(kafka.api.OffsetRequest.LatestTime(), 1);
// Earliest Offset
PartitionOffsetRequestInfo partitionEarliestOffsetRequestInfo =
new PartitionOffsetRequestInfo(kafka.api.OffsetRequest.EarliestTime(), 1);
Map<TopicAndPartition, PartitionOffsetRequestInfo> latestOffsetInfo =
new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
Map<TopicAndPartition, PartitionOffsetRequestInfo> earliestOffsetInfo =
new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
ArrayList<TopicAndPartition> topicAndPartitions = offsetRequestInfo.get(leader);
for (TopicAndPartition topicAndPartition : topicAndPartitions) {
latestOffsetInfo.put(topicAndPartition, partitionLatestOffsetRequestInfo);
earliestOffsetInfo.put(topicAndPartition, partitionEarliestOffsetRequestInfo);
}
OffsetResponse latestOffsetResponse = getLatestOffsetResponse(consumer, latestOffsetInfo, context);
OffsetResponse earliestOffsetResponse = null;
if (latestOffsetResponse != null) {
earliestOffsetResponse = getLatestOffsetResponse(consumer, earliestOffsetInfo, context);
}
consumer.close();
if (earliestOffsetResponse == null) {
log.warn(generateLogWarnForSkippedTopics(earliestOffsetInfo, consumer));
reportJobFailureUnableToGetOffsetFromKafka = true;
continue;
}
for (TopicAndPartition topicAndPartition : topicAndPartitions) {
long latestOffset = latestOffsetResponse.offsets(topicAndPartition.topic(), topicAndPartition.partition())[0];
long earliestOffset =
earliestOffsetResponse.offsets(topicAndPartition.topic(), topicAndPartition.partition())[0];
//TODO: factor out kafka specific request functionality
CamusRequest etlRequest =
new EtlRequest(context, topicAndPartition.topic(), Integer.toString(leader.getLeaderId()),
topicAndPartition.partition(), leader.getUri());
etlRequest.setLatestOffset(latestOffset);
etlRequest.setEarliestOffset(earliestOffset);
finalRequests.add(etlRequest);
}
}
return finalRequests;
}
protected OffsetResponse getLatestOffsetResponse(SimpleConsumer consumer,
Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetInfo, JobContext context) {
for (int i = 0; i < NUM_TRIES_FETCH_FROM_LEADER; i++) {
try {
OffsetResponse offsetResponse =
consumer.getOffsetsBefore(new OffsetRequest(offsetInfo, kafka.api.OffsetRequest.CurrentVersion(), CamusJob
.getKafkaClientName(context)));
if (offsetResponse.hasError()) {
throw new RuntimeException("offsetReponse has error.");
}
return offsetResponse;
} catch (Exception e) {
log.warn("Fetching offset from leader " + consumer.host() + ":" + consumer.port() + " has failed " + (i + 1)
+ " time(s). Reason: " + e.getMessage() + " " + (NUM_TRIES_FETCH_FROM_LEADER - i - 1) + " retries left.");
if (i < NUM_TRIES_FETCH_FROM_LEADER - 1) {
try {
Thread.sleep((long) (Math.random() * (i + 1) * 1000));
} catch (InterruptedException e1) {
log.error("Caught interrupted exception between retries of getting latest offsets. " + e1.getMessage());
}
}
}
}
return null;
}
private String generateLogWarnForSkippedTopics(Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetInfo,
SimpleConsumer consumer) {
StringBuilder sb = new StringBuilder();
sb.append("The following topics will be skipped due to failure in fetching latest offsets from leader "
+ consumer.host() + ":" + consumer.port());
for (TopicAndPartition topicAndPartition : offsetInfo.keySet()) {
sb.append(" " + topicAndPartition.topic());
}
return sb.toString();
}
public String createTopicRegEx(HashSet<String> topicsSet) {
String regex = "";
StringBuilder stringbuilder = new StringBuilder();
for (String whiteList : topicsSet) {
stringbuilder.append(whiteList);
stringbuilder.append("|");
}
regex = "(" + stringbuilder.substring(0, stringbuilder.length() - 1) + ")";
Pattern.compile(regex);
return regex;
}
public List<TopicMetadata> filterWhitelistTopics(List<TopicMetadata> topicMetadataList,
HashSet<String> whiteListTopics) {
ArrayList<TopicMetadata> filteredTopics = new ArrayList<TopicMetadata>();
String regex = createTopicRegEx(whiteListTopics);
for (TopicMetadata topicMetadata : topicMetadataList) {
if (Pattern.matches(regex, topicMetadata.topic())) {
filteredTopics.add(topicMetadata);
} else {
log.info("Discarding topic : " + topicMetadata.topic());
}
}
return filteredTopics;
}
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
CamusJob.startTiming("getSplits");
ArrayList<CamusRequest> finalRequests;
HashMap<LeaderInfo, ArrayList<TopicAndPartition>> offsetRequestInfo =
new HashMap<LeaderInfo, ArrayList<TopicAndPartition>>();
try {
// Get Metadata for all topics
List<TopicMetadata> topicMetadataList = getKafkaMetadata(context, new ArrayList<String>());
// Filter any white list topics
HashSet<String> whiteListTopics = new HashSet<String>(Arrays.asList(getKafkaWhitelistTopic(context)));
if (!whiteListTopics.isEmpty()) {
topicMetadataList = filterWhitelistTopics(topicMetadataList, whiteListTopics);
}
// Filter all blacklist topics
HashSet<String> blackListTopics = new HashSet<String>(Arrays.asList(getKafkaBlacklistTopic(context)));
String regex = "";
if (!blackListTopics.isEmpty()) {
regex = createTopicRegEx(blackListTopics);
}
for (TopicMetadata topicMetadata : topicMetadataList) {
if (Pattern.matches(regex, topicMetadata.topic())) {
log.info("Discarding topic (blacklisted): " + topicMetadata.topic());
} else if (!createMessageDecoder(context, topicMetadata.topic())) {
log.info("Discarding topic (Decoder generation failed) : " + topicMetadata.topic());
} else if (topicMetadata.errorCode() != ErrorMapping.NoError()) {
log.info("Skipping the creation of ETL request for Whole Topic : " + topicMetadata.topic() + " Exception : "
+ ErrorMapping.exceptionFor(topicMetadata.errorCode()));
} else {
for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
// We only care about LeaderNotAvailableCode error on partitionMetadata level
// Error codes such as ReplicaNotAvailableCode should not stop us.
partitionMetadata =
this.refreshPartitionMetadataOnLeaderNotAvailable(partitionMetadata, topicMetadata, context,
NUM_TRIES_PARTITION_METADATA);
if (partitionMetadata.errorCode() == ErrorMapping.LeaderNotAvailableCode()) {
log.info("Skipping the creation of ETL request for Topic : " + topicMetadata.topic()
+ " and Partition : " + partitionMetadata.partitionId() + " Exception : "
+ ErrorMapping.exceptionFor(partitionMetadata.errorCode()));
reportJobFailureDueToLeaderNotAvailable = true;
} else {
if (partitionMetadata.errorCode() != ErrorMapping.NoError()) {
log.warn("Receiving non-fatal error code, Continuing the creation of ETL request for Topic : "
+ topicMetadata.topic() + " and Partition : " + partitionMetadata.partitionId() + " Exception : "
+ ErrorMapping.exceptionFor(partitionMetadata.errorCode()));
}
LeaderInfo leader =
new LeaderInfo(new URI("tcp://" + partitionMetadata.leader().getConnectionString()),
partitionMetadata.leader().id());
if (offsetRequestInfo.containsKey(leader)) {
ArrayList<TopicAndPartition> topicAndPartitions = offsetRequestInfo.get(leader);
topicAndPartitions.add(new TopicAndPartition(topicMetadata.topic(), partitionMetadata.partitionId()));
offsetRequestInfo.put(leader, topicAndPartitions);
} else {
ArrayList<TopicAndPartition> topicAndPartitions = new ArrayList<TopicAndPartition>();
topicAndPartitions.add(new TopicAndPartition(topicMetadata.topic(), partitionMetadata.partitionId()));
offsetRequestInfo.put(leader, topicAndPartitions);
}
}
}
}
}
} catch (Exception e) {
log.error("Unable to pull requests from Kafka brokers. Exiting the program", e);
throw new IOException("Unable to pull requests from Kafka brokers.", e);
}
// Get the latest offsets and generate the EtlRequests
finalRequests = fetchLatestOffsetAndCreateEtlRequests(context, offsetRequestInfo);
Collections.sort(finalRequests, new Comparator<CamusRequest>() {
@Override
public int compare(CamusRequest r1, CamusRequest r2) {
return r1.getTopic().compareTo(r2.getTopic());
}
});
writeRequests(finalRequests, context);
Map<CamusRequest, EtlKey> offsetKeys = getPreviousOffsets(FileInputFormat.getInputPaths(context), context);
Set<String> moveLatest = getMoveToLatestTopicsSet(context);
String camusRequestEmailMessage = "";
for (CamusRequest request : finalRequests) {
if (moveLatest.contains(request.getTopic()) || moveLatest.contains("all")) {
log.info("Moving to latest for topic: " + request.getTopic());
//TODO: factor out kafka specific request functionality
EtlKey oldKey = offsetKeys.get(request);
EtlKey newKey =
new EtlKey(request.getTopic(), ((EtlRequest) request).getLeaderId(), request.getPartition(), 0,
request.getLastOffset());
if (oldKey != null)
newKey.setMessageSize(oldKey.getMessageSize());
offsetKeys.put(request, newKey);
}
EtlKey key = offsetKeys.get(request);
if (key != null) {
request.setOffset(key.getOffset());
request.setAvgMsgSize(key.getMessageSize());
}
if (request.getEarliestOffset() > request.getOffset() || request.getOffset() > request.getLastOffset()) {
if (request.getEarliestOffset() > request.getOffset()) {
log.error("The earliest offset was found to be more than the current offset: " + request);
} else {
log.error("The current offset was found to be more than the latest offset: " + request);
}
boolean move_to_earliest_offset = context.getConfiguration().getBoolean(KAFKA_MOVE_TO_EARLIEST_OFFSET, false);
boolean offsetUnset = request.getOffset() == EtlRequest.DEFAULT_OFFSET;
log.info("move_to_earliest: " + move_to_earliest_offset + " offset_unset: " + offsetUnset);
// When the offset is unset, it means it's a new topic/partition, we also need to consume the earliest offset
if (move_to_earliest_offset || offsetUnset) {
log.error("Moving to the earliest offset available");
request.setOffset(request.getEarliestOffset());
offsetKeys.put(
request,
//TODO: factor out kafka specific request functionality
new EtlKey(request.getTopic(), ((EtlRequest) request).getLeaderId(), request.getPartition(), 0, request
.getOffset()));
} else {
log.error("Offset range from kafka metadata is outside the previously persisted offset, " + request + "\n" +
" Topic " + request.getTopic() + " will be skipped.\n" +
" Please check whether kafka cluster configuration is correct." +
" You can also specify config parameter: " + KAFKA_MOVE_TO_EARLIEST_OFFSET +
" to start processing from earliest kafka metadata offset.");
reportJobFailureDueToOffsetOutOfRange = true;
}
} else if (3 * (request.getOffset() - request.getEarliestOffset())
< request.getLastOffset() - request.getOffset()) {
camusRequestEmailMessage +=
"The current offset is too close to the earliest offset, Camus might be falling behind: "
+ request + "\n";
}
log.info(request);
}
if(!Strings.isNullOrEmpty(camusRequestEmailMessage)) {
EmailClient.sendEmail(camusRequestEmailMessage);
}
writePrevious(offsetKeys.values(), context);
CamusJob.stopTiming("getSplits");
CamusJob.startTiming("hadoop");
CamusJob.setTime("hadoop_start");
WorkAllocator allocator = getWorkAllocator(context);
Properties props = new Properties();
props.putAll(context.getConfiguration().getValByRegex(".*"));
allocator.init(props);
return allocator.allocateWork(finalRequests, context);
}
private Set<String> getMoveToLatestTopicsSet(JobContext context) {
Set<String> topics = new HashSet<String>();
String[] arr = getMoveToLatestTopics(context);
if (arr != null) {
for (String topic : arr) {
topics.add(topic);
}
}
return topics;
}
private boolean createMessageDecoder(JobContext context, String topic) {
try {
MessageDecoderFactory.createMessageDecoder(context, topic);
return true;
} catch (Exception e) {
log.error("failed to create decoder", e);
return false;
}
}
private void writePrevious(Collection<EtlKey> missedKeys, JobContext context) throws IOException {
FileSystem fs = FileSystem.get(context.getConfiguration());
Path output = FileOutputFormat.getOutputPath(context);
if (fs.exists(output)) {
fs.mkdirs(output);
}
output = new Path(output, EtlMultiOutputFormat.OFFSET_PREFIX + "-previous");
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, context.getConfiguration(), output, EtlKey.class, NullWritable.class);
for (EtlKey key : missedKeys) {
writer.append(key, NullWritable.get());
}
writer.close();
}
protected void writeRequests(List<CamusRequest> requests, JobContext context) throws IOException {
FileSystem fs = FileSystem.get(context.getConfiguration());
Path output = FileOutputFormat.getOutputPath(context);
if (fs.exists(output)) {
fs.mkdirs(output);
}
output = new Path(output, EtlMultiOutputFormat.REQUESTS_FILE);
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, context.getConfiguration(), output, EtlRequest.class, NullWritable.class);
for (CamusRequest r : requests) {
//TODO: factor out kafka specific request functionality
writer.append(r, NullWritable.get());
}
writer.close();
}
private Map<CamusRequest, EtlKey> getPreviousOffsets(Path[] inputs, JobContext context) throws IOException {
Map<CamusRequest, EtlKey> offsetKeysMap = new HashMap<CamusRequest, EtlKey>();
for (Path input : inputs) {
FileSystem fs = input.getFileSystem(context.getConfiguration());
for (FileStatus f : fs.listStatus(input, new OffsetFileFilter())) {
log.info("previous offset file:" + f.getPath().toString());
SequenceFile.Reader reader = new SequenceFile.Reader(fs, f.getPath(), context.getConfiguration());
EtlKey key = new EtlKey();
while (reader.next(key, NullWritable.get())) {
//TODO: factor out kafka specific request functionality
CamusRequest request = new EtlRequest(context, key.getTopic(), key.getLeaderId(), key.getPartition());
if (offsetKeysMap.containsKey(request)) {
EtlKey oldKey = offsetKeysMap.get(request);
if (oldKey.getOffset() < key.getOffset()) {
offsetKeysMap.put(request, key);
}
} else {
offsetKeysMap.put(request, key);
}
key = new EtlKey();
}
reader.close();
}
}
return offsetKeysMap;
}
public PartitionMetadata refreshPartitionMetadataOnLeaderNotAvailable(PartitionMetadata partitionMetadata,
TopicMetadata topicMetadata, JobContext context, int numTries) throws InterruptedException {
int tryCounter = 0;
while (tryCounter < numTries && partitionMetadata.errorCode() == ErrorMapping.LeaderNotAvailableCode()) {
log.info("Retry to referesh the topicMetadata on LeaderNotAvailable...");
List<TopicMetadata> topicMetadataList =
this.getKafkaMetadata(context, Collections.singletonList(topicMetadata.topic()));
if (topicMetadataList == null || topicMetadataList.size() == 0) {
log.warn("The topicMetadataList for topic " + topicMetadata.topic() + " is empty.");
} else {
topicMetadata = topicMetadataList.get(0);
boolean partitionFound = false;
for (PartitionMetadata metadataPerPartition : topicMetadata.partitionsMetadata()) {
if (metadataPerPartition.partitionId() == partitionMetadata.partitionId()) {
partitionFound = true;
if (metadataPerPartition.errorCode() != ErrorMapping.LeaderNotAvailableCode()) {
return metadataPerPartition;
} else { //retry again.
if (tryCounter < numTries - 1) {
Thread.sleep((long) (Math.random() * (tryCounter + 1) * BACKOFF_UNIT_MILLISECONDS));
}
break;
}
}
}
if (!partitionFound) {
log.error("No matching partition found in the topicMetadata for Partition: "
+ partitionMetadata.partitionId());
}
}
tryCounter++;
}
return partitionMetadata;
}
public static void setWorkAllocator(JobContext job, Class<WorkAllocator> val) {
job.getConfiguration().setClass(CAMUS_WORK_ALLOCATOR_CLASS, val, WorkAllocator.class);
}
public static WorkAllocator getWorkAllocator(JobContext job) {
try {
return (WorkAllocator) job.getConfiguration()
.getClass(CAMUS_WORK_ALLOCATOR_CLASS, Class.forName(CAMUS_WORK_ALLOCATOR_DEFAULT)).newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void setMoveToLatestTopics(JobContext job, String val) {
job.getConfiguration().set(KAFKA_MOVE_TO_LAST_OFFSET_LIST, val);
}
public static String[] getMoveToLatestTopics(JobContext job) {
return job.getConfiguration().getStrings(KAFKA_MOVE_TO_LAST_OFFSET_LIST);
}
public static void setKafkaClientBufferSize(JobContext job, int val) {
job.getConfiguration().setInt(KAFKA_CLIENT_BUFFER_SIZE, val);
}
public static int getKafkaClientBufferSize(JobContext job) {
return job.getConfiguration().getInt(KAFKA_CLIENT_BUFFER_SIZE, 2 * 1024 * 1024);
}
public static void setKafkaClientTimeout(JobContext job, int val) {
job.getConfiguration().setInt(KAFKA_CLIENT_SO_TIMEOUT, val);
}
public static int getKafkaClientTimeout(JobContext job) {
return job.getConfiguration().getInt(KAFKA_CLIENT_SO_TIMEOUT, 60000);
}
public static void setKafkaMaxPullHrs(JobContext job, int val) {
job.getConfiguration().setInt(KAFKA_MAX_PULL_HRS, val);
}
public static int getKafkaMaxPullHrs(JobContext job) {
return job.getConfiguration().getInt(KAFKA_MAX_PULL_HRS, -1);
}
public static void setKafkaMaxPullMinutesPerTask(JobContext job, int val) {
job.getConfiguration().setInt(KAFKA_MAX_PULL_MINUTES_PER_TASK, val);
}
public static int getKafkaMaxPullMinutesPerTask(JobContext job) {
return job.getConfiguration().getInt(KAFKA_MAX_PULL_MINUTES_PER_TASK, -1);
}
public static void setKafkaMaxHistoricalDays(JobContext job, int val) {
job.getConfiguration().setInt(KAFKA_MAX_HISTORICAL_DAYS, val);
}
public static int getKafkaMaxHistoricalDays(JobContext job) {
return job.getConfiguration().getInt(KAFKA_MAX_HISTORICAL_DAYS, -1);
}
public static void setKafkaBlacklistTopic(JobContext job, String val) {
job.getConfiguration().set(KAFKA_BLACKLIST_TOPIC, val);
}
public static String[] getKafkaBlacklistTopic(JobContext job) {
return getKafkaBlacklistTopic(job.getConfiguration());
}
public static String[] getKafkaBlacklistTopic(Configuration conf) {
final String blacklistStr = conf.get(KAFKA_BLACKLIST_TOPIC);
if (blacklistStr != null && !blacklistStr.isEmpty()) {
return conf.getStrings(KAFKA_BLACKLIST_TOPIC);
} else {
return new String[] {};
}
}
public static void setKafkaWhitelistTopic(JobContext job, String val) {
job.getConfiguration().set(KAFKA_WHITELIST_TOPIC, val);
}
public static String[] getKafkaWhitelistTopic(JobContext job) {
return getKafkaWhitelistTopic(job.getConfiguration());
}
public static String[] getKafkaWhitelistTopic(Configuration conf) {
final String whitelistStr = conf.get(KAFKA_WHITELIST_TOPIC);
if (whitelistStr != null && !whitelistStr.isEmpty()) {
return conf.getStrings(KAFKA_WHITELIST_TOPIC);
} else {
return new String[] {};
}
}
public static void setEtlIgnoreSchemaErrors(JobContext job, boolean val) {
job.getConfiguration().setBoolean(ETL_IGNORE_SCHEMA_ERRORS, val);
}
public static boolean getEtlIgnoreSchemaErrors(JobContext job) {
return job.getConfiguration().getBoolean(ETL_IGNORE_SCHEMA_ERRORS, false);
}
public static void setEtlAuditIgnoreServiceTopicList(JobContext job, String topics) {
job.getConfiguration().set(ETL_AUDIT_IGNORE_SERVICE_TOPIC_LIST, topics);
}
public static String[] getEtlAuditIgnoreServiceTopicList(JobContext job) {
return job.getConfiguration().getStrings(ETL_AUDIT_IGNORE_SERVICE_TOPIC_LIST, "");
}
public static void setMessageDecoderClass(JobContext job, Class<MessageDecoder> cls) {
job.getConfiguration().setClass(CAMUS_MESSAGE_DECODER_CLASS, cls, MessageDecoder.class);
}
public static Class<MessageDecoder> getMessageDecoderClass(JobContext job) {
return (Class<MessageDecoder>) job.getConfiguration().getClass(CAMUS_MESSAGE_DECODER_CLASS,
KafkaAvroMessageDecoder.class);
}
public static Class<MessageDecoder> getMessageDecoderClass(JobContext job, String topicName) {
Class<MessageDecoder> topicDecoder =
(Class<MessageDecoder>) job.getConfiguration().getClass(CAMUS_MESSAGE_DECODER_CLASS + "." + topicName, null);
return topicDecoder == null ? getMessageDecoderClass(job) : topicDecoder;
}
private class OffsetFileFilter implements PathFilter {
@Override
public boolean accept(Path arg0) {
return arg0.getName().startsWith(EtlMultiOutputFormat.OFFSET_PREFIX);
}
}
}