package edu.isi.karma.mapreduce.function; import java.util.LinkedList; import java.util.List; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; import org.json.JSONArray; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class CollectJSONObject extends UDF { private static Logger LOG = LoggerFactory.getLogger(MergeJSON.class); public Text evaluate(Text target, Text path) { try { if (path == null) return new Text(""); String targetString = target.toString(); String pathString = path.toString().replaceAll("\\[.*\\]",""); JSONObject obj = new JSONObject(targetString); JSONArray array = new JSONArray(); for (String aPath : pathString.split(",")) { String[] levels = splitPath(aPath); collectJSONObject(obj, levels, 1, array); } return new Text(array.toString()); }catch(Exception e) { LOG.error("something wrong",e ); return new Text(""); } } protected static String[] splitPath(String aPath) { List<String> levelsCandidates = new LinkedList<>(); int lastSplit = 0; for(int i = 0; i < aPath.length() - 1; i++) { if(aPath.charAt(i) != '\\' && aPath.charAt(i+1) == '.') { levelsCandidates.add(aPath.substring(lastSplit, aPath.charAt(i) != '\\'? i+1 : i).replace("\\", "")); lastSplit = i+2; } } if(lastSplit < aPath.length()) { levelsCandidates.add(aPath.substring(lastSplit, aPath.length()).replace("\\", "")); } String[] levels = new String[levelsCandidates.size()]; levels = levelsCandidates.toArray(levels); return levels; } public void collectJSONObject(JSONObject obj, String[] levels, int i, JSONArray array) { if (i < levels.length && obj.has(levels[i])) { if (i == levels.length - 1) { Object value = obj.get(levels[i]); if (value instanceof JSONArray) { JSONArray t = (JSONArray)value; for (int j = 0; j < t.length(); j++) { array.put(t.get(j)); } } else { array.put(value); } } else { Object value = obj.get(levels[i]); if (value instanceof JSONArray) { JSONArray t = (JSONArray)value; for (int j = 0; j < t.length(); j++) { try { JSONObject o = t.getJSONObject(j); collectJSONObject(o, levels, i + 1, array); }catch (Exception e) { } } } else if (value instanceof JSONObject){ collectJSONObject((JSONObject)value, levels, i + 1, array); } } } } }