package com.narphorium.entity_builder;
import com.fasterxml.jackson.core.JsonGenerationException;
import com.github.jsonldjava.utils.JsonUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
public class TripleMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
private static final String RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
private NTriplesParser tripleParser = new NTriplesParser();
private EntityFrame entityFrame = new EntityFrame();
@Override
public void configure(JobConf conf) {
Path framePath = new Path(conf.get("frame-file"));
try {
FileSystem fs = FileSystem.get(conf);
if (framePath != null) {
entityFrame.parse(fs.open(framePath));
}
} catch (IOException ex) {
Logger.getGlobal().severe(ex.toString());
}
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object,
* org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
*/
public void map(LongWritable lineNumber, Text rawTriple, OutputCollector<Text, Text> triples,
Reporter reporter) throws IOException {
//System.out.println("Triple Test");
RdfTriple triple = tripleParser.parse(rawTriple.toString());
// Only map triples that are needed for the frame
if (entityFrame.getMappedPredicates().contains(triple.getPredicate())) {
if (triple.getPredicate().equals(RDF_TYPE)) {
triple = new RdfTriple(triple.getSubject(), "@type", triple.getObject());
}
triples.collect(new Text(triple.getSubject()), new Text(tripleAsJsonLd(triple)));
}
// Map reversed triples if they're needed to join nested entities together.
String reversePredicate = "!" + triple.getPredicate();
if (entityFrame.getMappedPredicates().contains(reversePredicate)) {
RdfTriple reversedTriple =
new RdfTriple(triple.getObject(), reversePredicate, triple.getSubject());
triples.collect(new Text(triple.getObject()), new Text(tripleAsJsonLd(reversedTriple)));
}
}
private String tripleAsJsonLd(RdfTriple triple) throws JsonGenerationException, IOException {
Map<String, Object> jsonValue = new HashMap<String, Object>();
jsonValue.put("@id", triple.getSubject());
List<Object> objectValues = new ArrayList<Object>();
if (triple.isLiteral() || triple.getPredicate().equals("@type")) {
objectValues.add(triple.getObject());
} else {
Map<String, Object> valueObject = new HashMap<String, Object>();
valueObject.put("@id", triple.getObject());
objectValues.add(valueObject);
}
jsonValue.put(triple.getPredicate(), objectValues);
return JsonUtils.toString(jsonValue);
}
}