package com.narphorium.entity_builder;
import com.github.jsonldjava.core.JsonLdError;
import com.github.jsonldjava.core.JsonLdOptions;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.utils.JsonUtils;
import com.mongodb.hadoop.io.BSONWritable;
import com.mongodb.hadoop.io.MongoUpdateWritable;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.bson.BasicBSONObject;
import org.bson.types.ObjectId;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
public class EntityFrameReducer extends MapReduceBase implements Reducer<Text, Text, NullWritable, MongoUpdateWritable> {
private EntityFrame entityFrame = new EntityFrame();
private JsonLdOptions options = new JsonLdOptions();
@Override
public void configure(JobConf conf) {
Path framePath = new Path(conf.get("frame-file"));
try {
FileSystem fs = FileSystem.get(conf);
if (framePath != null) {
entityFrame.parse(fs.open(framePath));
}
} catch (IOException ex) {
Logger.getGlobal().severe(ex.toString());
}
options.setExplicit(false);
options.setCompactArrays(true);
options.setEmbed(true);
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, java.util.Iterator,
* org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
*/
public void reduce(Text entityId, Iterator<Text> fragments,
OutputCollector<NullWritable, MongoUpdateWritable> framedEntities, Reporter arg3) throws IOException {
while (fragments.hasNext()) {
String triple = fragments.next().toString();
Map<String, Object> jsonData = (Map<String, Object>) JsonUtils.fromString(triple);
Map<String, Object> framedEntity;
try {
framedEntity = JsonLdProcessor.frame(jsonData, entityFrame.getFrame(), options);
List<Object> entities = (List<Object>)framedEntity.get("@graph");
BasicBSONObject bsonQuery = new BasicBSONObject("_id", entityId.toString());
BasicBSONObject bsonObject = new BasicBSONObject();
bsonObject.putAll((Map<String, Object>)entities.get(0));
BasicBSONObject bsonUpdate = new BasicBSONObject();
bsonUpdate.put("$set", bsonObject);
framedEntities.collect(null, new MongoUpdateWritable(bsonQuery, bsonUpdate, true, false));
} catch (JsonLdError e) {
e.printStackTrace();
}
}
}
}