package net.lenards;
import net.lenards.kinesis.KinesisCheckpointState;
import net.lenards.kinesis.types.*;
import net.lenards.types.EventRecord;
import java.io.Serializable;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
import javax.xml.bind.DatatypeConverter;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.dstream.ReceiverInputDStream;
import org.apache.spark.streaming.receiver.Receiver;
import com.amazonaws.AmazonClientException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.Region;
import com.amazonaws.regions.RegionUtils;
import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException;
import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException;
import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException;
import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor;
import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer;
import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory;
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker;
import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason;
import com.amazonaws.services.kinesis.model.Record;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.*;
class EventRecordMapFunc implements Function<String, EventRecord>, Serializable {
@Override
public EventRecord call(String record) {
// validation would be a good idea ...
String[] pieces = record.split("\\|");
if (pieces.length == 2) {
String[] keys = pieces[0].split(":");
String[] fields = pieces[1].split(";");
long ts = DatatypeConverter.parseDateTime(fields[0]).getTimeInMillis();
return new EventRecord(keys[0], keys[1], ts, fields[1],
Integer.valueOf(fields[2]));
}
// if the format is off, just give by an empty object;
return new EventRecord();
}
}
public class Consumer implements Serializable {
private String appName;
private String streamName;
private String endpointUrl;
private String regionName;
private Duration checkpointInterval;
private SparkConf conf;
public Consumer(String appName, String streamName, String endpointUrl,
String regionName) {
this.appName = appName;
this.streamName = streamName;
this.endpointUrl = endpointUrl;
this.regionName = regionName;
this.checkpointInterval = new Duration(EventRecordProcessor.DEFAULT_INTERVAL_IN_MS);
init();
}
private void init() {
this.conf = new SparkConf(true)
.set("spark.cassandra.connection.host", "127.0.0.1")
.setMaster("local[3]")
.setAppName(this.appName);
}
public void start() {
final JavaStreamingContext context = new JavaStreamingContext(conf, checkpointInterval);
// for graceful shutdown of the application ...
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
System.out.println("Shutting down streaming app...");
context.stop(true, true);
System.out.println("Shutdown of streaming app complete.");
}
});
JKinesisReceiver receiver = new JKinesisReceiver(appName, streamName,
endpointUrl, regionName,
checkpointInterval,
InitialPositionInStream.LATEST);
JavaDStream<String> dstream = context.receiverStream(receiver);
JavaDStream<EventRecord> recs = dstream.map(new EventRecordMapFunc());
recs.print();
// persist to DStream to Cassandra
javaFunctions(recs)
.writerBuilder("canary", "eventrecord", mapToRow(EventRecord.class))
.saveToCassandra();
System.out.println("Start Spark Stream Processing...");
context.start();
context.awaitTermination();
}
public static void verify(String[] args) {
System.out.println("Command Line Arguments: " + Arrays.asList(args));
if (!(args.length == 4)) {
System.out.println("Usage: \n\tConsumer " +
"<app-name> <stream-name> <endpoint-url> <aws-region>");
System.exit(1);
}
}
public static void main(String[] args) throws Exception {
verify(args);
Consumer c = new Consumer(args[0], args[1], args[2], args[3]);
c.start();
}
}