package edu.isi.karma.spark;
import java.io.IOException;
import java.net.URL;
import java.util.Properties;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import com.holdenkarau.spark.testing.JavaRDDComparisons;
import com.holdenkarau.spark.testing.SharedJavaSparkContext;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.junit.Test;
import scala.Option;
import scala.Tuple2;
import scala.Tuple3;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TestJSONGeneratorWithProvenance extends SharedJavaSparkContext implements Serializable {
/**
*
*/
private static final long serialVersionUID = -7981683598336701496L;
protected URL getTestResource(String name)
{
return getClass().getClassLoader().getResource(name);
}
@Test
public void testWithoutProvenance() throws IOException, ParseException {
JavaRDD<String> karmaRDD = KarmaDriver.applyModel(jsc(), getInputRDD(),
getKarmaSettings(false).toString(), 1000, 2);
JavaRDD<String> result = JSONReducerDriver.reduceJSON(jsc(), karmaRDD, 1,
getKarmaSettings(false).toString());
List<String> lines = result.collect();
JSONParser jsonParser = new JSONParser();
for(String line: lines) {
String[] keyValue = line.split("\t");
if(keyValue[0].equals("http://effect.isi.edu/data/forum/100/topic/C73751DBA59047F08EE1664859163237AE9F7780")) {
JSONObject jsonObj = (JSONObject)jsonParser.parse(keyValue[1]);
String source = jsonObj.get("source").toString();
// System.out.println(source);
// System.out.println(line);
assertEquals(source, "[\"asu-hacking-post_76b7327d0bd1846e3e153530f9be7722\",\"asu-hacking-post_89ecd75389778f4c37ddf3f71e42ba63\",\"asu-hacking-post_b22bc37a0e1d5a92b86726d2df067d5b\",\"asu-hacking-post_c0bdcf1482cec7959a5de0f5b47eeedc\"]");
}
}
assertEquals(result.count(), 12);
}
@Test
public void testProvenance() throws IOException, ParseException {
JavaRDD<String> karmaRDD = KarmaDriver.applyModel(jsc(),
getInputRDD(),
getKarmaSettings(true).toString(), 1000, 2);
JavaRDD<String> result = JSONReducerDriver.reduceJSON(jsc(), karmaRDD, 1, getKarmaSettings(true).toString());
List<String> lines = result.collect();
JSONParser jsonParser = new JSONParser();
for(String line: lines) {
String[] keyValue = line.split("\t");
if(keyValue[0].equals("http://effect.isi.edu/data/forum/100/topic/C73751DBA59047F08EE1664859163237AE9F7780")) {
JSONObject jsonObj = (JSONObject)jsonParser.parse(keyValue[1]);
String source = jsonObj.get("source").toString();
// System.out.println(source);
// System.out.println(line);
assertEquals(source, "asu-hacking-post_c0bdcf1482cec7959a5de0f5b47eeedc");
}
}
assertEquals(result.count(), 12);
}
private JavaRDD<String> getInputRDD() {
JavaRDD<String> inputRDD = jsc().textFile(getTestResource("provenance/hacking_posts.jl").toString());
assertEquals(inputRDD.count(), 4);
JavaRDD<String> tabRDD = inputRDD.map(new Function<String, String>() {
@Override
public String call(String arg0) throws Exception {
return "karma\t" + arg0;
}
});
return tabRDD;
}
private JSONObject getKarmaSettings(boolean provenance) {
JSONObject karmaSettings = new JSONObject();
karmaSettings.put("karma.input.type", "JSON");
karmaSettings.put("base.uri", "http://effect.isi.edu/data/");
karmaSettings.put("rdf.generation.root", "http://schema.dig.isi.edu/ontology/Topic1");
karmaSettings.put("model.uri", getTestResource("provenance/hacking_posts-model.ttl").toString());
karmaSettings.put("is.model.in.json", "true");
karmaSettings.put("context.uri", getTestResource("provenance/karma-context.json").toString());
karmaSettings.put("is.root.in.json", "true");
karmaSettings.put("read.karma.config", "false");
karmaSettings.put("rdf.generation.disable.nesting", "true");
karmaSettings.put("karma.reducer.run", "false");
if(provenance)
karmaSettings.put("karma.provenance.properties", "source,publisher,dateRecorded:date");
return karmaSettings;
}
}