package solution; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; public class PlayAnalyzer { public static void main(String[] args) { SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); JavaSparkContext sc = new JavaSparkContext("local", "JavaAPISuite", conf); SQLContext sqlContext = new SQLContext(sc); // Creates a DataFrame from a specified file DataFrame plays = sqlContext.load("output", "com.databricks.spark.avro"); // Apply the schema to the RDD. sqlContext.registerDataFrameAsTable(plays, "playbyplay"); // Run the query DataFrame join = sqlContext .sql("select playtype, pertotalstable.totalperplay, totalstable.total, ((pertotalstable.totalperplay / totalstable.total) * 100) as percentage from " + "(select playtype, count(*) as totalperplay from playbyplay where rooftype <> \"None\" and prcp <= 0 group by playtype) pertotalstable " + "full outer join " + "(select count(*) as total from playbyplay where rooftype <> \"None\" and prcp <= 0) totalstable " + "order by playtype"); // Output the query's rows join.javaRDD().collect().forEach((Row row) -> { System.out.println("Result:" + row.toString()); }); } }