package edu.isi.karma.mapreduce.function; import java.io.File; import java.io.IOException; import java.io.InputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.json.JSONArray; import org.json.JSONObject; public class CreateSequenceFilesByType extends CreateSequenceFile { public static void main(String[] args) throws IOException { CreateSequenceFilesByType csf = new CreateSequenceFilesByType(); csf.setup(args); csf.execute(); } protected class JSONFileProcessorByType extends JSONFileProcessor{ public JSONFileProcessorByType(InputStream stream, String fileName) { super(stream, fileName); } public SequenceFile.Writer getWriter(JSONObject obj) throws IOException { String type = ""; Object rawtype = obj.get("@type"); if(rawtype instanceof String) { type = (String) rawtype; } else if(rawtype instanceof JSONArray) { JSONArray types = (JSONArray)rawtype; type = types.getString(0); } if(!writers.containsKey(type)) { String typeSpecificOutputFileName = outputPath + File.separator +type.substring(Math.max(0,type.lastIndexOf('/'))) + ".seq"; Path outputPath = new Path(typeSpecificOutputFileName); synchronized(writers) { writers.put(type, createSequenceFile(outputPath)); } } return writers.get(type); } } protected JSONFileProcessor getNewJSONProcessor(FileSystem hdfs, LocatedFileStatus status, String fileName) throws IOException { return new JSONFileProcessorByType(hdfs.open(status.getPath()), fileName); } }