package com.narphorium.entity_builder;
import com.github.jsonldjava.utils.JsonUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Logger;
public class EntityReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
private EntityFrame entityFrame = new EntityFrame();
private int rank;
@Override
public void configure(JobConf conf) {
rank = conf.getInt("rank", 0);
Path framePath = new Path(conf.get("frame-file"));
try {
FileSystem fs = FileSystem.get(conf);
if (framePath != null) {
entityFrame.parse(fs.open(framePath));
}
} catch (IOException ex) {
Logger.getGlobal().severe(ex.toString());
}
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, java.util.Iterator,
* org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
*/
public void reduce(Text entityId, Iterator<Text> fragments, OutputCollector<Text, Text> entities,
Reporter reporter) throws IOException {
Map<String, Object> object = new TreeMap<String, Object>();
// Merge object fragments into a single object
while (fragments.hasNext()) {
String triple = fragments.next().toString();
Map<String, Object> tripleData = (Map<String, Object>) JsonUtils.fromString(triple);
for (Map.Entry<String, Object> entry2 : tripleData.entrySet()) {
if (entry2.getKey().equals("@id")) {
object.put(entry2.getKey(), entry2.getValue());
} else if (object.containsKey(entry2.getKey())) {
((List<Object>) object.get(entry2.getKey())).addAll((List<Object>) entry2.getValue());
} else {
object.put(entry2.getKey(), entry2.getValue());
}
}
}
// Pivot object
boolean pivoted = false;
for (String pivotProperty : entityFrame.getPivots(rank)) {
String reversedProperty = "!" + pivotProperty;
Map<String, Object> cleanObject = new TreeMap<String, Object>(object);
cleanObject.remove(reversedProperty);
if (object.containsKey(reversedProperty)) {
for (Object pivotNode : (List<Object>) object.get(reversedProperty)) {
Map<String, Object> pivotedObject = new TreeMap<String, Object>();
List<Object> objects = new ArrayList<Object>();
objects.add(cleanObject);
pivotedObject.put(pivotProperty, objects);
if (pivotNode instanceof String) {
entities.collect(new Text((String) pivotNode),
new Text(JsonUtils.toString(pivotedObject)));
} else if (pivotNode instanceof Map) {
String key = (String) ((Map<String, Object>) pivotNode).get("@id");
entities.collect(new Text(key), new Text(JsonUtils.toString(pivotedObject)));
}
pivoted = true;
}
}
}
if (!pivoted) {
entities.collect(entityId, new Text(JsonUtils.toString(object)));
}
}
}