package com.xavient.dip.spark.twitter; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import com.xavient.dip.spark.writer.SparkJdbcSourceWriter; import scala.Tuple2; public class TopNLocationByTweets extends TopN<String, Integer> { public TopNLocationByTweets(SparkJdbcSourceWriter rdbmsWriter,int topN) { super(rdbmsWriter,topN); this.tableName = "tweets_location"; this.schema = new StructType(new StructField[] { new StructField("location", DataTypes.StringType, false, null), new StructField("count", DataTypes.IntegerType, false, null) }); } @Override protected <T> JavaPairRDD<String, Integer> doMapToPair(JavaRDD<T> rdd) { return rdd.mapToPair(tweet -> { Object[] data = (Object[]) tweet; return new Tuple2<String, Integer>((String) data[8], 1); }).reduceByKey((val1, val2) -> val1 + val2); } }