package com.xavient.dip.spark.twitter; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import com.xavient.dip.spark.writer.SparkJdbcSourceWriter; import scala.Tuple2; public class TopNUsersWithMaxFollowers extends TopN<String, Integer> { public TopNUsersWithMaxFollowers(SparkJdbcSourceWriter rdbmsWriter,int topN) { super(rdbmsWriter,topN); this.tableName = "user_followers"; this.schema = new StructType(new StructField[] { new StructField("username", DataTypes.StringType, false, null), new StructField("count", DataTypes.IntegerType, false, null) }); } @Override protected <T> JavaPairRDD<String, Integer> doMapToPair(JavaRDD<T> rdd) { return rdd.mapToPair(tweet -> { Object[] data = (Object[]) tweet; return new Tuple2<String, Integer>((String) data[5], (Integer) data[15]); }); } }