package com.caseystella.summarize;
import com.caseystella.util.JSONUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.collect.ImmutableList;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.List;
import java.util.Map;
public class SummarizerIntegrationTest {
private transient JavaSparkContext sc;
private transient SQLContext sqlContext;
@Before
public void setup() {
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sc = new JavaSparkContext("local", "JavaAPISuite", conf);
sqlContext = new SQLContext(sc);
}
public <T> DataFrame createDf(List<T> data, Class<T> clazz) {
JavaRDD<T> rdd = sc.parallelize(data);
return sqlContext.createDataFrame(rdd, clazz);
}
public static class Row {
String column1;
String column2;
String column3;
String column4;
String column5;
public Row(String column1, String column2, String column3, String column4, String column5) {
this.column1 = column1;
this.column2 = column2;
this.column3 = column3;
this.column4 = column4;
this.column5 = column5;
}
public String getColumn1() {
return column1;
}
public void setColumn1(String column1) {
this.column1 = column1;
}
public String getColumn2() {
return column2;
}
public void setColumn2(String column2) {
this.column2 = column2;
}
public String getColumn3() {
return column3;
}
public void setColumn3(String column3) {
this.column3 = column3;
}
public String getColumn4() {
return column4;
}
public void setColumn4(String column4) {
this.column4 = column4;
}
public String getColumn5() {
return column5;
}
public void setColumn5(String column5) {
this.column5 = column5;
}
@Override
public String toString() {
return "Row{" +
"column1='" + column1 + '\'' +
", column2='" + column2 + '\'' +
", column3='" + column3 + '\'' +
", column4='" + column4 + '\'' +
", column5='" + column5 + '\'' +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Row row = (Row) o;
if (getColumn1() != null ? !getColumn1().equals(row.getColumn1()) : row.getColumn1() != null) return false;
if (getColumn2() != null ? !getColumn2().equals(row.getColumn2()) : row.getColumn2() != null) return false;
if (getColumn3() != null ? !getColumn3().equals(row.getColumn3()) : row.getColumn3() != null) return false;
if (getColumn4() != null ? !getColumn4().equals(row.getColumn4()) : row.getColumn4() != null) return false;
return getColumn5() != null ? getColumn5().equals(row.getColumn5()) : row.getColumn5() == null;
}
@Override
public int hashCode() {
int result = getColumn1() != null ? getColumn1().hashCode() : 0;
result = 31 * result + (getColumn2() != null ? getColumn2().hashCode() : 0);
result = 31 * result + (getColumn3() != null ? getColumn3().hashCode() : 0);
result = 31 * result + (getColumn4() != null ? getColumn4().hashCode() : 0);
result = 31 * result + (getColumn5() != null ? getColumn5().hashCode() : 0);
return result;
}
}
@Test
public void test() throws JsonProcessingException {
DataFrame df = createDf(ImmutableList.of(new Row("1", "1.5", "Dec 01, 1994", null, "foo")
,new Row("2", "-7.5", "20150204", "", null)
,new Row("3", "-2.5", "", "", null)
,new Row("NaN", "-2.5", "", "", null)
,new Row("pigeons", "-2.5", "", "", "several")
,new Row("null", "25", "", "", "foo")
,new Row("2.3", "-2.5", "", "", "casey")
,new Row("2.3", "-2", "", "", "foo")
,new Row("4", "-2", "", "", "several")
,new Row("4", "-2", "", "", "chicken")
,new Row("5", "-2", "", "", "chicken")
,new Row("5", "-2", "", "", "chicken")
)
, Row.class
);
TotalSummary summary = Summarizer.summarize(df, 5, 3, ImmutableList.of(10d, 25d, 50d, 75d, 95d, 99d), 5, 0.5, 2, 3 );
System.out.println(JSONUtils.INSTANCE.toJSON(summary, true));
}
@After
public void shutdown() {
sc.stop();
}
}