package com.xavient.dip.spark.twitter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.streaming.api.java.JavaDStream;
import com.xavient.dip.spark.writer.SparkJdbcSourceWriter;
import scala.Tuple2;
public abstract class TopN<K, V extends Comparable<V>> {
protected int topN;
protected String tableName;
protected StructType schema;
protected SparkJdbcSourceWriter rdbmsWriter;
public TopN(SparkJdbcSourceWriter rdbmsWriter,int topN) {
super();
this.topN=topN;
this.rdbmsWriter = rdbmsWriter;
}
public <T> void compute(JavaDStream<T> twitterStream) {
twitterStream.foreachRDD(rdd -> {
List<Row> rows = new ArrayList<>();
doMapToPair(rdd).top(topN,
(Comparator<Tuple2<K, V>> & Serializable) (tuple1, tuple2) -> tuple1._2.compareTo(tuple2._2))
.forEach(tuple -> rows.add(createRow(tuple)));
rdbmsWriter.write(rows, schema, tableName);
});
}
protected Row createRow(Tuple2<K, V> record) {
return RowFactory.create(record._1, record._2);
}
protected abstract <T> JavaPairRDD<K, V> doMapToPair(JavaRDD<T> rdd);
}