package edu.isi.karma.mapreduce.function;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Iterator;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONTokener;
import edu.isi.karma.rdf.CommandLineArgumentParser;
public class UpdateIdAndType {
static String filePath = null;
static String outputPath = null;
public static void main(String[] args) throws IOException {
Options options = createCommandLineOptions();
CommandLine cl = CommandLineArgumentParser.parse(args, options, UpdateIdAndType.class.getSimpleName());
if(cl == null)
{
return;
}
filePath = (String) cl.getOptionValue("--filepath");
outputPath = filePath;
if (cl.hasOption("--outputpath")) {
outputPath = (String) cl.getOptionValue("--outputpath");
}
FileSystem hdfs = FileSystem.get(new Configuration());
RemoteIterator<LocatedFileStatus> itr = hdfs.listFiles(new Path(filePath), true);
while (itr.hasNext()) {
LocatedFileStatus status = itr.next();
String fileName = status.getPath().getName();
Path filePath = status.getPath();
if (status.getLen() > 0) {
String outputFileName = outputPath + File.separator + fileName + ".json";
createSequenceFileFromJSON(hdfs.open(status.getPath()), hdfs.create(new Path(outputFileName)));
hdfs.delete(status.getPath(), false);
hdfs.rename(new Path(outputFileName), filePath);
}
}
}
public static void createSequenceFileFromJSON(FSDataInputStream fsDataInputStream, FSDataOutputStream fsDataOutputStream) throws IOException {
JSONTokener tokener = new JSONTokener(new InputStreamReader(fsDataInputStream, "UTF-8"));
PrintWriter pw = new PrintWriter(fsDataOutputStream);
pw.println("[");
tokener.nextClean();
char tmp = '[';
while(tmp != ']') {
JSONObject obj = (JSONObject) tokener.nextValue();
processJSONObject(obj);
pw.println(obj.toString(4));
tmp = tokener.nextClean();
if (tmp != ']') {
pw.println(",");
}
}
pw.println("]");
pw.close();
}
@SuppressWarnings("unchecked")
private static void processJSONObject(JSONObject obj) {
if (obj.has("@id")) {
obj.put("uri", obj.getString("@id"));
obj.remove("@id");
}
if (obj.has("@type")) {
obj.put("a", obj.get("@type"));
obj.remove("@type");
}
for (Iterator<String> keysIterator = obj.keys(); keysIterator.hasNext(); ) {
String key = keysIterator.next();
Object o = obj.get((String) key);
if (o instanceof JSONObject) {
processJSONObject((JSONObject) o);
}
if (o instanceof JSONArray) {
JSONArray array = (JSONArray) o;
for (int i = 0; i < array.length(); i++) {
Object tmp = array.get(i);
if (tmp instanceof JSONObject)
processJSONObject((JSONObject) tmp);
}
}
}
}
private static Options createCommandLineOptions() {
Options options = new Options();
options.addOption(new Option("filepath", "filepath", true, "location of the input file directory"));
options.addOption(new Option("outputpath", "outputpath", true, "location of output file directory"));
options.addOption(new Option("help", "help", false, "print this message"));
return options;
}
}