/*
* Seldon -- open source prediction engine
* =======================================
* Copyright 2011-2015 Seldon Technologies Ltd and Rummble Ltd (http://www.seldon.io/)
*
**********************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**********************************************************************************************
*/
package io.seldon.stream.analytics;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.Namespace;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.connect.json.JsonDeserializer;
import org.apache.kafka.connect.json.JsonSerializer;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.ForeachAction;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KStreamBuilder;
import org.apache.kafka.streams.kstream.KeyValueMapper;
import org.apache.kafka.streams.kstream.Predicate;
import org.apache.kafka.streams.kstream.Reducer;
import org.apache.kafka.streams.kstream.TimeWindows;
import org.apache.kafka.streams.kstream.Windowed;
import org.apache.kafka.streams.processor.WallclockTimestampExtractor;
import org.influxdb.InfluxDB;
import org.influxdb.InfluxDBFactory;
import org.influxdb.dto.Point;
import com.fasterxml.jackson.databind.JsonNode;
public class ImpressionsToInfluxDb {
@SuppressWarnings("unchecked")
public static void process(final Namespace ns) throws InterruptedException
{
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-impressions");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, ns.getString("kafka"));
props.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, ns.getString("zookeeper"));
props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
props.put(StreamsConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, WallclockTimestampExtractor.class);
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final InfluxDB influxDB = InfluxDBFactory.connect("http://"+ns.getString("influxdb"), ns.getString("influx_user"), ns.getString("influx_password"));
influxDB.enableBatch(50, 5, TimeUnit.SECONDS);
KStreamBuilder builder = new KStreamBuilder();
JsonDeserializer jsonDeserializer = new JsonDeserializer();
final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(new JsonSerializer(),jsonDeserializer);
final Serde<String> stringSerde = Serdes.String();
io.seldon.stream.serializer.JsonSerializer<Impression> impressionJsonSerializer = new io.seldon.stream.serializer.JsonSerializer<>();
io.seldon.stream.serializer.JsonDeserializer<Impression> impressionJsonDeserializer = new io.seldon.stream.serializer.JsonDeserializer<>(Impression.class);
Serde<Impression> impressionSerde = Serdes.serdeFrom(impressionJsonSerializer,impressionJsonDeserializer);
io.seldon.stream.serializer.JsonSerializer<Request> requestJsonSerializer = new io.seldon.stream.serializer.JsonSerializer<>();
io.seldon.stream.serializer.JsonDeserializer<Request> requestJsonDeserializer = new io.seldon.stream.serializer.JsonDeserializer<>(Request.class);
Serde<Request> requestSerde = Serdes.serdeFrom(requestJsonSerializer,requestJsonDeserializer);
System.out.println("Topic is "+ns.getString("topic"));
KStream<String, JsonNode> source = builder.stream(stringSerde,jsonSerde,ns.getString("topic"));
KStream<String,JsonNode>[] branches = source.branch(
new Predicate<String, JsonNode>()
{
@Override
public boolean test(String key, JsonNode value)
{
if (value.get("tag").asText().equals("restapi.ctralg"))
{
return true;
}
else
{
return false;
}
}
},
new Predicate<String, JsonNode>()
{
@Override
public boolean test(String key, JsonNode value)
{
if (value.get("tag").asText().equals("restapi.calls"))
{
return true;
}
else
{
return false;
}
}
}
);
KStream<String,JsonNode> impressionsStream = branches[0];
KStream<String,JsonNode> requestsStream = branches[1];
/*
* Impressions topology
*/
impressionsStream.map(new KeyValueMapper<String, JsonNode, KeyValue<String,Impression>>() {
@Override
public KeyValue<String, Impression> apply(String key, JsonNode value) {
//Nasty hack until we get correct method to reduce and send non or final per second aggregations to influxdb
Random r = new Random();
Impression imp = new Impression(value);
String ikey = imp.consumer+"_"+imp.rectag+"_"+imp.variation+"_"+imp.time+"_"+r.nextInt();
return new KeyValue<String,Impression>(ikey,imp);
}
})
.reduceByKey(new Reducer<Impression>() {
@Override
public Impression apply(Impression value1, Impression value2) {
return value1.add(value2);
}
}, TimeWindows.of("ImpressionWindow", 5000L),stringSerde, impressionSerde)
.foreach(
new ForeachAction<Windowed<String>, Impression>() {
@Override
public void apply(Windowed<String> key, Impression value) {
Random r = new Random();
long time = value.time * 1000000;
time = time + r.nextInt(1000000);
Point point = Point.measurement(ns.getString("influx_measurement_impressions"))
.time(time, TimeUnit.MICROSECONDS)
.tag("client", value.consumer)
.tag("rectag", value.rectag)
.tag("variation", value.variation)
.addField("impressions", value.imp)
.addField("clicks", value.click)
.build();
//System.out.println(key.key()+"Window "+key.window().start()+" to "+key.window().end()+"Value is "+value.toString());
influxDB.write(ns.getString("influx_database"), "default", point);
}
});
requestsStream.map(new KeyValueMapper<String, JsonNode, KeyValue<String,Request>>() {
@Override
public KeyValue<String, Request> apply(String key, JsonNode value) {
//Nasty hack until we get correct method to reduce and send non or final per second aggregations to influxdb
Random r = new Random();
Request req = new Request(value);
String rkey = req.consumer+"_"+req.path+"_"+req.httpmethod+"_"+req.time+"_"+r.nextInt();
return new KeyValue<String,Request>(rkey,req);
}
})
.reduceByKey(new Reducer<Request>() {
@Override
public Request apply(Request value1, Request value2) {
return value1.add(value2);
}
}, TimeWindows.of("RequestWindow", 5000L),stringSerde, requestSerde)
.foreach(new ForeachAction<Windowed<String>, Request>() {
@Override
public void apply(Windowed<String> key, Request value) {
Random r = new Random();
long time = value.time * 1000000;
time = time + r.nextInt(1000000);
Point point = Point.measurement(ns.getString("influx_measurement_requests"))
.time(time, TimeUnit.MICROSECONDS)
.tag("client", value.consumer)
.tag("path", value.path)
.tag("httpmethod", value.httpmethod)
.addField("count", value.count)
.addField("exectime", value.exectime/((float)value.count))
.build();
//System.out.println("Value is "+value.toString());
influxDB.write(ns.getString("influx_database"), "default", point);
}
});
KafkaStreams streams = new KafkaStreams(builder, props);
streams.start();
}
public static void main(String[] args) throws Exception {
ArgumentParser parser = ArgumentParsers.newArgumentParser("ImpressionsToInfluxDb")
.defaultHelp(true)
.description("Read Seldon impressions and send stats to influx db");
parser.addArgument("-t", "--topic").setDefault("impressions").help("Kafka topic to read from");
parser.addArgument("-k", "--kafka").setDefault("localhost:9092").help("Kafka server and port");
parser.addArgument("-z", "--zookeeper").setDefault("localhost:2181").help("Zookeeper server and port");
parser.addArgument("-i", "--influxdb").setDefault("localhost:8086").help("Influxdb server and port");
parser.addArgument("-u", "--influx-user").setDefault("root").help("Influxdb user");
parser.addArgument("-p", "--influx-password").setDefault("root").help("Influxdb password");
parser.addArgument("-d", "--influx-database").setDefault("seldon").help("Influxdb database");
parser.addArgument("--influx-measurement-impressions").setDefault("impressions").help("Influxdb impressions measurement");
parser.addArgument("--influx-measurement-requests").setDefault("requests").help("Influxdb requests measurement");
Namespace ns = null;
try {
ns = parser.parseArgs(args);
ImpressionsToInfluxDb.process(ns);
} catch (ArgumentParserException e) {
parser.handleError(e);
System.exit(1);
}
}
}