/*
* Copyright © 2014-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.logging.save;
import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.api.metrics.MetricsContext;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.proto.Id;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.AbstractIdleService;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import com.google.inject.name.Named;
import org.apache.twill.common.Cancellable;
import org.apache.twill.kafka.client.KafkaClientService;
import org.apache.twill.kafka.client.KafkaConsumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
/**
* Saves logs published through Kafka.
*/
public final class LogSaver extends AbstractIdleService {
private static final Logger LOG = LoggerFactory.getLogger(LogSaver.class);
private static final int TIMEOUT_SECONDS = 10;
private final String topic;
private final KafkaClientService kafkaClient;
private final Set<Integer> partitions;
private Map<Integer, Cancellable> kafkaCancelMap;
private Map<Integer, CountDownLatch> kafkaCancelCallbackLatchMap;
private Set<KafkaLogProcessor> messageProcessors;
private final MetricsContext metricsContext;
@Inject
LogSaver(KafkaClientService kafkaClient,
CConfiguration cConf,
@Named(Constants.LogSaver.MESSAGE_PROCESSORS) Set<KafkaLogProcessor> messageProcessors,
@Assisted Set<Integer> partitions,
MetricsCollectionService metricsCollectionService)
throws Exception {
LOG.info("Initializing LogSaver...");
this.topic = cConf.get(Constants.Logging.KAFKA_TOPIC);
this.partitions = partitions;
LOG.info(String.format("Kafka topic: %s, partitions: %s", this.topic, this.partitions));
this.kafkaClient = kafkaClient;
this.kafkaCancelMap = new HashMap<>();
this.kafkaCancelCallbackLatchMap = new HashMap<>();
this.messageProcessors = messageProcessors;
// TODO: add instance id of the log saver as a tag, when CDAP-3265 is fixed
this.metricsContext = metricsCollectionService.getContext(
ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, Id.Namespace.SYSTEM.getId(),
Constants.Metrics.Tag.COMPONENT, Constants.Service.LOGSAVER));
}
@Override
protected void startUp() throws Exception {
LOG.info("Starting LogSaver...");
waitForDatasetAvailability();
scheduleTasks(partitions);
LOG.info("Started LogSaver.");
}
@Override
protected void shutDown() throws Exception {
LOG.info("Stopping LogSaver...");
unscheduleTasks();
LOG.info("Stopped LogSaver.");
}
@VisibleForTesting
void scheduleTasks(Set<Integer> partitions) throws Exception {
subscribe(partitions);
}
@VisibleForTesting
void unscheduleTasks() {
cancelLogCollectorCallbacks();
for (KafkaLogProcessor processor : messageProcessors) {
try {
// Catching the exception to let all the plugins a chance to stop cleanly.
processor.stop();
} catch (Throwable th) {
LOG.error("Error stopping processor {}",
processor.getClass().getSimpleName());
}
}
}
private void cancelLogCollectorCallbacks() {
for (Entry<Integer, Cancellable> entry : kafkaCancelMap.entrySet()) {
if (entry.getValue() != null) {
LOG.info("Cancelling kafka callback for partition {}", entry.getKey());
kafkaCancelCallbackLatchMap.get(entry.getKey()).countDown();
entry.getValue().cancel();
}
}
kafkaCancelMap.clear();
kafkaCancelCallbackLatchMap.clear();
}
private void subscribe(Set<Integer> partitions) throws Exception {
LOG.info("Prepare to subscribe for partitions: {}", partitions);
for (KafkaLogProcessor processor : messageProcessors) {
processor.init(partitions);
}
Map<Integer, Long> partitionOffset = Maps.newHashMap();
for (int part : partitions) {
KafkaConsumer.Preparer preparer = kafkaClient.getConsumer().prepare();
long offset = getLowestCheckpointOffset(part);
partitionOffset.put(part, offset);
if (offset >= 0) {
preparer.add(topic, part, offset);
} else {
preparer.addFromBeginning(topic, part);
}
kafkaCancelCallbackLatchMap.put(part, new CountDownLatch(1));
kafkaCancelMap.put(part, preparer.consume(
new KafkaMessageCallback(kafkaCancelCallbackLatchMap.get(part), messageProcessors, metricsContext)));
}
LOG.info("Consumer created for topic {}, partitions {}", topic, partitionOffset);
}
private long getLowestCheckpointOffset(int partition) {
long lowestCheckpoint = -1L;
for (KafkaLogProcessor processor : messageProcessors) {
Checkpoint checkpoint = processor.getCheckpoint(partition);
// If checkpoint offset is -1; then ignore the checkpoint offset
if (checkpoint.getNextOffset() != -1) {
lowestCheckpoint = (lowestCheckpoint == -1 || checkpoint.getNextOffset() < lowestCheckpoint) ?
checkpoint.getNextOffset() :
lowestCheckpoint;
}
}
return lowestCheckpoint;
}
private void waitForDatasetAvailability() throws InterruptedException {
boolean isDatasetAvailable = false;
while (!isDatasetAvailable) {
try {
for (KafkaLogProcessor processor : messageProcessors) {
processor.getCheckpoint(0);
}
isDatasetAvailable = true;
} catch (Exception e) {
LOG.warn(String.format("Cannot discover dataset service. Retry after %d seconds timeout.", TIMEOUT_SECONDS));
TimeUnit.SECONDS.sleep(TIMEOUT_SECONDS);
}
}
}
}