package solution;
import java.io.BufferedReader;
import java.io.FileReader;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.storage.StorageLevel;
public class AllQueryAnalyzer {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
JavaSparkContext sc = new JavaSparkContext("local", "JavaAPISuite", conf);
SQLContext sqlContext = new SQLContext(sc);
// Creates a DataFrame from a specified file
DataFrame plays = sqlContext.load("output", "com.databricks.spark.avro");
// Cache plays in memory
plays.persist(StorageLevel.MEMORY_ONLY());
// Apply the schema to the RDD.
sqlContext.registerDataFrameAsTable(plays, "playbyplay");
StringBuilder query = new StringBuilder();
try {
BufferedReader reader = new BufferedReader(new FileReader("../queries.hql"));
String line = null;
while ((line = reader.readLine()) != null) {
if (line.startsWith("set ")) {
continue;
} else if (line.startsWith("! echo ")) {
String toOutput = line.split("\"")[1];
System.out.println(toOutput);
} else if (line.trim().endsWith(";")) {
// Found the end of the query, execute it
query.append(line);
// Remove the trailing ; as Spark doesn't handle it
query.deleteCharAt(query.length() - 1);
runSQL(sqlContext, query.toString());
// Reset the query StringBuilder
query = new StringBuilder();
} else {
query.append(line);
}
}
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void runSQL(SQLContext sqlContext, String query) {
// Run the query
DataFrame df = sqlContext.sql(query);
// Output the query's rows
df.javaRDD().collect().forEach((Row row) -> {
System.out.println("Result:" + row.toString());
});
}
}