package org.archive.hadoop.func; import java.io.IOException; import java.util.ArrayList; import java.util.logging.Logger; import org.apache.pig.EvalFunc; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.archive.format.json.JSONUtils; import org.json.JSONException; import org.json.JSONObject; public class JSONViewEvalFunc extends EvalFunc<Tuple> { private static final Logger LOG = Logger.getLogger(JSONViewEvalFunc.class.getName()); protected TupleFactory mTupleFactory = TupleFactory.getInstance(); private ArrayList<Object> mProtoTuple = null; public JSONViewEvalFunc() { mProtoTuple = new ArrayList<Object>(); } @Override public Tuple exec(Tuple tup) throws IOException { // [0] is the JSON. Remaining elements are Strings describing paths // into the JSON to "flatten" into a single tuple: if(tup == null || tup.size() == 0) { return null; } try { JSONObject json = new JSONObject(tup.get(0).toString()); for(int i = 1; i < tup.size(); i++) { String path = tup.get(i).toString(); String result = JSONUtils.extractSingle(json, path); mProtoTuple.add(result); } } catch (JSONException e) { LOG.warning("Failed to parse JSON:"+e.getMessage()); return null; } Tuple t = mTupleFactory.newTuple(mProtoTuple); mProtoTuple.clear(); return t; } }