package mil.nga.giat.geowave.core.ingest.kafka;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.ConsumerTimeoutException;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.ingest.GeoWaveData;
import mil.nga.giat.geowave.core.ingest.IngestPluginBase;
import mil.nga.giat.geowave.core.ingest.avro.AvroFormatPlugin;
import mil.nga.giat.geowave.core.ingest.avro.GenericAvroSerializer;
import mil.nga.giat.geowave.core.ingest.index.IndexProvider;
import mil.nga.giat.geowave.core.store.CloseableIterator;
import mil.nga.giat.geowave.core.store.DataStore;
import mil.nga.giat.geowave.core.store.IndexWriter;
import mil.nga.giat.geowave.core.store.adapter.WritableDataAdapter;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions;
import mil.nga.giat.geowave.core.store.operations.remote.options.IndexPluginOptions;
import mil.nga.giat.geowave.core.store.operations.remote.options.VisibilityOptions;
/**
* This class executes the ingestion of intermediate data from a Kafka topic
* into GeoWave.
*
*/
public class IngestFromKafkaDriver
{
private final static Logger LOGGER = LoggerFactory.getLogger(IngestFromKafkaDriver.class);
private final DataStorePluginOptions storeOptions;
private final List<IndexPluginOptions> indexOptions;
private final Map<String, AvroFormatPlugin<?, ?>> ingestPlugins;
private final KafkaConsumerCommandLineOptions kafkaOptions;
private final VisibilityOptions ingestOptions;
private final List<Future<?>> futures = new ArrayList<Future<?>>();
public IngestFromKafkaDriver(
DataStorePluginOptions storeOptions,
List<IndexPluginOptions> indexOptions,
Map<String, AvroFormatPlugin<?, ?>> ingestPlugins,
KafkaConsumerCommandLineOptions kafkaOptions,
VisibilityOptions ingestOptions ) {
this.storeOptions = storeOptions;
this.indexOptions = indexOptions;
this.ingestPlugins = ingestPlugins;
this.kafkaOptions = kafkaOptions;
this.ingestOptions = ingestOptions;
}
public boolean runOperation() {
final DataStore dataStore = storeOptions.createDataStore();
final List<String> queue = new ArrayList<String>();
addPluginsToQueue(
ingestPlugins,
queue);
configureAndLaunchPlugins(
dataStore,
ingestPlugins,
queue);
int counter = 0;
while (queue.size() > 0) {
if (counter > 30) {
for (final String pluginFormatName : queue) {
LOGGER.error("Unable to start up Kafka consumer for plugin [" + pluginFormatName + "]");
}
break;
}
try {
Thread.sleep(1000);
}
catch (final InterruptedException e) {
LOGGER.error(
"Thread interrupted",
e);
}
counter++;
}
if (queue.size() == 0) {
LOGGER.info("All format plugins are now listening on Kafka topics");
}
else {
LOGGER.warn("Unable to setup Kafka consumers for the following format plugins:");
for (final String formatPluginName : queue) {
LOGGER.warn("\t[" + formatPluginName + "]");
}
return false;
}
return true;
}
private void addPluginsToQueue(
final Map<String, AvroFormatPlugin<?, ?>> pluginProviders,
final List<String> queue ) {
queue.addAll(pluginProviders.keySet());
}
private void configureAndLaunchPlugins(
final DataStore dataStore,
final Map<String, AvroFormatPlugin<?, ?>> pluginProviders,
final List<String> queue ) {
try {
for (Entry<String, AvroFormatPlugin<?, ?>> pluginProvider : pluginProviders.entrySet()) {
final List<WritableDataAdapter<?>> adapters = new ArrayList<WritableDataAdapter<?>>();
AvroFormatPlugin<?, ?> avroFormatPlugin = null;
try {
avroFormatPlugin = pluginProvider.getValue();
final IngestPluginBase<?, ?> ingestWithAvroPlugin = avroFormatPlugin.getIngestWithAvroPlugin();
final WritableDataAdapter<?>[] dataAdapters = ingestWithAvroPlugin.getDataAdapters(ingestOptions
.getVisibility());
adapters.addAll(Arrays.asList(dataAdapters));
final KafkaIngestRunData runData = new KafkaIngestRunData(
adapters,
dataStore);
futures.add(launchTopicConsumer(
pluginProvider.getKey(),
avroFormatPlugin,
runData,
queue));
}
catch (final UnsupportedOperationException e) {
LOGGER.warn(
"Plugin provider '" + pluginProvider.getKey() + "' does not support ingest from Kafka",
e);
continue;
}
}
}
catch (final Exception e) {
LOGGER.warn(
"Error in accessing Kafka stream",
e);
}
}
private ConsumerConnector buildKafkaConsumer() {
Properties kafkaProperties = kafkaOptions.getProperties();
final ConsumerConnector consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(
kafkaProperties));
return consumer;
}
private Future<?> launchTopicConsumer(
final String formatPluginName,
final AvroFormatPlugin<?, ?> avroFormatPlugin,
final KafkaIngestRunData ingestRunData,
final List<String> queue )
throws IllegalArgumentException {
final ExecutorService executorService = Executors.newFixedThreadPool(queue.size());
return executorService.submit(new Runnable() {
@Override
public void run() {
try {
consumeFromTopic(
formatPluginName,
avroFormatPlugin,
ingestRunData,
queue);
}
catch (final Exception e) {
LOGGER.error(
"Error consuming from Kafka topic [" + formatPluginName + "]",
e);
}
}
});
}
public <T> void consumeFromTopic(
final String formatPluginName,
final AvroFormatPlugin<T, ?> avroFormatPlugin,
final KafkaIngestRunData ingestRunData,
final List<String> queue ) {
final ConsumerConnector consumer = buildKafkaConsumer();
if (consumer == null) {
throw new RuntimeException(
"Kafka consumer connector is null, unable to create message streams");
}
try {
LOGGER.debug("Kafka consumer setup for format [" + formatPluginName + "] against topic ["
+ formatPluginName + "]");
final Map<String, Integer> topicCount = new HashMap<>();
topicCount.put(
formatPluginName,
1);
final Map<String, List<KafkaStream<byte[], byte[]>>> consumerStreams = consumer
.createMessageStreams(topicCount);
final List<KafkaStream<byte[], byte[]>> streams = consumerStreams.get(formatPluginName);
queue.remove(formatPluginName);
consumeMessages(
formatPluginName,
avroFormatPlugin,
ingestRunData,
streams.get(0));
}
finally {
consumer.shutdown();
}
}
protected <T> void consumeMessages(
final String formatPluginName,
final AvroFormatPlugin<T, ?> avroFormatPlugin,
final KafkaIngestRunData ingestRunData,
final KafkaStream<byte[], byte[]> stream ) {
int currentBatchId = 0;
final int batchSize = kafkaOptions.getBatchSize();
try {
final ConsumerIterator<byte[], byte[]> messageIterator = stream.iterator();
while (messageIterator.hasNext()) {
final byte[] msg = messageIterator.next().message();
LOGGER.info("[" + formatPluginName + "] message received");
final T dataRecord = GenericAvroSerializer.deserialize(
msg,
avroFormatPlugin.getAvroSchema());
if (dataRecord != null) {
try {
processMessage(
dataRecord,
ingestRunData,
avroFormatPlugin);
if (++currentBatchId > batchSize) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(String.format(
"Flushing %d items",
currentBatchId));
}
ingestRunData.flush();
currentBatchId = 0;
}
}
catch (final Exception e) {
LOGGER.error(
"Error processing message: " + e.getMessage(),
e);
}
}
}
}
catch (final ConsumerTimeoutException te) {
// Flush any outstanding items
if (currentBatchId > 0) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(String.format(
"Flushing %d items",
currentBatchId));
}
ingestRunData.flush();
currentBatchId = 0;
}
if (kafkaOptions.isFlushAndReconnect()) {
LOGGER.info(
"Consumer timed out from Kafka topic [" + formatPluginName + "]... Reconnecting...",
te);
consumeMessages(
formatPluginName,
avroFormatPlugin,
ingestRunData,
stream);
}
else {
LOGGER.info(
"Consumer timed out from Kafka topic [" + formatPluginName + "]... ",
te);
}
}
catch (final Exception e) {
LOGGER.warn(
"Consuming from Kafka topic [" + formatPluginName + "] was interrupted... ",
e);
}
}
synchronized protected <T> void processMessage(
final T dataRecord,
final KafkaIngestRunData ingestRunData,
final AvroFormatPlugin<T, ?> plugin )
throws IOException {
IngestPluginBase<T, ?> ingestPlugin = plugin.getIngestWithAvroPlugin();
IndexProvider indexProvider = plugin;
final Map<ByteArrayId, IndexWriter> writerMap = new HashMap<ByteArrayId, IndexWriter>();
final Map<ByteArrayId, PrimaryIndex> indexMap = new HashMap<ByteArrayId, PrimaryIndex>();
for (IndexPluginOptions indexOption : indexOptions) {
final PrimaryIndex primaryIndex = indexOption.createPrimaryIndex();
if (primaryIndex == null) {
LOGGER.error("Could not get index instance, getIndex() returned null;");
throw new IOException(
"Could not get index instance, getIndex() returned null");
}
indexMap.put(
primaryIndex.getId(),
primaryIndex);
}
final PrimaryIndex[] requiredIndices = indexProvider.getRequiredIndices();
if ((requiredIndices != null) && (requiredIndices.length > 0)) {
for (final PrimaryIndex requiredIndex : requiredIndices) {
indexMap.put(
requiredIndex.getId(),
requiredIndex);
}
}
try (CloseableIterator<?> geowaveDataIt = ingestPlugin.toGeoWaveData(
dataRecord,
indexMap.keySet(),
ingestOptions.getVisibility())) {
while (geowaveDataIt.hasNext()) {
final GeoWaveData<?> geowaveData = (GeoWaveData<?>) geowaveDataIt.next();
final WritableDataAdapter adapter = ingestRunData.getDataAdapter(geowaveData);
if (adapter == null) {
LOGGER.warn("Adapter not found for " + geowaveData.getValue());
continue;
}
IndexWriter indexWriter = writerMap.get(adapter.getAdapterId());
if (indexWriter == null) {
List<PrimaryIndex> indexList = new ArrayList<PrimaryIndex>();
for (final ByteArrayId indexId : geowaveData.getIndexIds()) {
final PrimaryIndex index = indexMap.get(indexId);
if (index == null) {
LOGGER.warn("Index '" + indexId.getString() + "' not found for " + geowaveData.getValue());
continue;
}
indexList.add(index);
}
indexWriter = ingestRunData.getIndexWriter(
adapter,
indexList.toArray(new PrimaryIndex[indexList.size()]));
writerMap.put(
adapter.getAdapterId(),
indexWriter);
}
indexWriter.write(geowaveData.getValue());
}
}
}
public List<Future<?>> getFutures() {
return futures;
}
/**
* Only true if all futures are complete.
*
* @return
*/
public boolean isComplete() {
for (Future<?> future : futures) {
if (!future.isDone()) {
return false;
}
}
return true;
}
/**
* Wait for all kafka topics to complete, then return the result objects.
*
* @return
* @throws InterruptedException
* @throws ExecutionException
*/
public List<Object> waitFutures()
throws InterruptedException,
ExecutionException {
List<Object> results = new ArrayList<Object>();
for (Future<?> future : futures) {
results.add(future.get());
}
return results;
}
}