package org.archive.hadoop;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.archive.format.json.JSONView;
import org.json.JSONException;
import org.json.JSONObject;
public class ArchiveJSONViewLoader extends ArchiveMetadataLoader {
private final static Logger LOG =
Logger.getLogger(ArchiveJSONViewLoader.class.getName());
protected TupleFactory mCacheTupleFactory = TupleFactory.getInstance();
private ArrayList<Object> mCacheProtoTuple = null;
private JSONView view;
// private static final List<String> EMPTY;
// static {
// EMPTY = new ArrayList<String>();
// EMPTY.add("");
// }
// ArrayList<String> fields;
ArrayList<Tuple> cached;
public ArchiveJSONViewLoader(String... fieldArgs) {
super();
// TODO: fix this logging...
Logger.getLogger("org.archive").setLevel(Level.WARNING);
mCacheProtoTuple = new ArrayList<Object>();
cached = null;
if(fieldArgs.length == 0) {
LOG.info("Constructed with NO foo");
throw new RuntimeException("No field definition");
} else {
if(LOG.isLoggable(Level.INFO)) {
LOG.info("ArchiveJSONViewLoader:(" +
StringUtils.join(fieldArgs,",") +
")");
}
view = new JSONView(fieldArgs);
}
}
@Override
public Tuple getNext() throws IOException {
if(cached == null) {
// try to load some more:
Tuple inner = super.getNext();
if(inner != null) {
cached = applyView(inner);
}
}
if(cached != null) {
Tuple n = cached.remove(0);
if(cached.size() == 0) {
cached = null;
}
return n;
}
// all done
return null;
}
private ArrayList<Tuple> applyView(Tuple inner) {
// [0] is the JSON. Remaining elements are Strings describing paths
// into the JSON to "flatten" into a single tuple:
if(inner == null || inner.size() == 0) {
return null;
}
try {
JSONObject json = new JSONObject(inner.get(2).toString());
List<List<String>> matches = view.apply(json);
if(matches.size() == 0) {
return null;
}
ArrayList<Tuple> results = new ArrayList<Tuple>();
for(List<String> t : matches) {
mCacheProtoTuple.addAll(t);
Tuple tup = mCacheTupleFactory.newTuple(mCacheProtoTuple);
mCacheProtoTuple.clear();
results.add(tup);
}
return results;
} catch (JSONException e) {
LOG.warning("Failed to parse JSON:"+e.getMessage());
} catch (ExecException e) {
LOG.warning("ExecException:"+e.getMessage());
}
return null;
}
}