package edu.isi.karma.spark; import java.io.IOException; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.json.JSONObject; import org.json.XML; import scala.Tuple2; public class UtilitiesDriver { public static JavaPairRDD<String, String> XMLToJSON(JavaSparkContext jsc, JavaPairRDD<String, String> input) throws IOException { return input.mapToPair(new PairFunction<Tuple2<String,String>, String, String>() { private static final long serialVersionUID = 2878941073410454935L; @Override public Tuple2<String, String> call(Tuple2<String, String> t) throws Exception { String key = t._1(); JSONObject value = new JSONObject(t._2()); String raw = value.getString("_rawContent"); JSONObject json = getJsonFromXml(raw); value.put("_jsonRep", json); return new Tuple2<String, String>(key, value.toString()); } }); } public static JavaRDD<String> XMLToJSON(JavaSparkContext jsc, JavaRDD<String> input) throws IOException { JavaPairRDD<String, String> inputPair = input.mapToPair(new PairFunction<String, String, String>() { private static final long serialVersionUID = -4153068088292891034L; public Tuple2<String, String> call(String s) throws Exception { int tabIndex = s.indexOf("\t"); return new Tuple2<>(s.substring(0, tabIndex), s.substring(tabIndex + 1)); } }); JavaPairRDD<String, String> pairs = XMLToJSON(jsc, inputPair); return pairs.map(new Function<Tuple2<String, String>, String>() { private static final long serialVersionUID = 5833358013516510838L; @Override public String call(Tuple2<String, String> arg0) throws Exception { return (arg0._1() + "\t" + arg0._2()); } }); } public static org.json.JSONObject getJsonFromXml(String xmlStr) { return XML.toJSONObject(xmlStr); } /* * method to convert xml to json */ public static String getJsonFromXml(String xmlStr, boolean prettyOutput) { org.json.JSONObject xmlJSONObj = XML.toJSONObject(xmlStr); String jsonStr = ""; if(prettyOutput) jsonStr = xmlJSONObj.toString(4); else jsonStr = xmlJSONObj.toString(); return jsonStr; } }