/* Copyright 2013-2015 Fabian Steeg, Pascal Christoph, hbz. Licensed under the Eclipse Public License 1.0 */
package org.lobid.lodmill;
import java.io.IOException;
import java.util.HashMap;
import org.culturegraph.mf.framework.DefaultObjectPipe;
import org.culturegraph.mf.framework.ObjectReceiver;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.jsonldjava.core.JsonLdError;
import com.github.jsonldjava.core.JsonLdOptions;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.jena.JenaRDFParser;
import com.github.jsonldjava.utils.JSONUtils;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
/**
* Converts a jena model to JSON-LD document(s) consumable by elasticsearch.
* Every node in the graph will be a document on its own, except when this is
* declared otherwise (-> keep node).
*
* @author Fabian Steeg (fsteeg)
* @author Pascal Christoph (dr0i)
*/
@In(Model.class)
@Out(HashMap.class)
public final class RdfModel2ElasticsearchJsonLd
extends DefaultObjectPipe<Model, ObjectReceiver<HashMap<String, String>>> {
private static final Logger LOG =
LoggerFactory.getLogger(RdfModel2ElasticsearchJsonLd.class);
// the items will have their own index type and ES parents
private static final String PROPERTY_TO_PARENT =
"http://purl.org/vocab/frbr/core#exemplarOf";
private static String LOBID_DOMAIN = "http://lobid.org/";
private static String LOBID_ITEM_URI_PREFIX = LOBID_DOMAIN + "item/";
// the sub node we want to cling to the main node
private static final String KEEP_NODE_PREFIX = "http://d-nb.info/gnd";
private static final String KEEP_NODE_MAIN_PREFIX =
LOBID_DOMAIN + "resource/";
private static String mainNodeId;
private static final String TYPE_ITEM = "json-ld-lobid-item";
private static final String TYPE_RESOURCE = "json-ld-lobid";
private static final JenaRDFParser parser = new JenaRDFParser();
@Override
public void process(final Model originModel) {
splitModel2ItemAndResourceModel(originModel);
}
private void splitModel2ItemAndResourceModel(final Model originalModel) {
Model copyOfOriginalModel =
ModelFactory.createModelForGraph(originalModel.getGraph());
final ResIterator subjectsIterator = originalModel.listSubjects();
String ABOUT_JSON = "";
// iterate through all nodes
while (subjectsIterator.hasNext()) {
final Resource subjectResource = subjectsIterator.next();
Model submodel = ModelFactory.createDefaultModel();
if (!subjectResource.isAnon()) {
if (subjectResource.getURI().endsWith("about")) {
shouldSubmodelBeExtracted(submodel, subjectResource);
try {
Object json =
JsonLdProcessor.fromRDF(submodel, new JsonLdOptions(), parser);
ABOUT_JSON = JSONUtils.toString(JsonLdProcessor.expand(json));
ABOUT_JSON = "," + ABOUT_JSON.substring(2, ABOUT_JSON.length() - 2);
} catch (JsonLdError e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
// just extract sub nodes we don't want to keep in the main model
if (!subjectResource.getURI().startsWith(KEEP_NODE_PREFIX)
&& !subjectResource.getURI().startsWith(KEEP_NODE_MAIN_PREFIX)) {
if (shouldSubmodelBeExtracted(submodel, subjectResource)) {
toJson(submodel, subjectResource.getURI().toString(), "");
}
} else
if (subjectResource.getURI().toString().startsWith(LOBID_DOMAIN))
mainNodeId = subjectResource.getURI().toString();
}
if (!submodel.isEmpty()) {
// remove the newly created sub model from the main node
copyOfOriginalModel.remove(submodel);
}
}
}
// the main node (with its kept sub node) and an optional "about" metadata
toJson(copyOfOriginalModel, mainNodeId, ABOUT_JSON);
}
// A sub model mustn't be extracted if the resource is to be kept as a sub
// node of the main node. An bnode mustn't be extracted either.
private static boolean shouldSubmodelBeExtracted(Model submodel,
Resource subjectResource) {
StmtIterator stmtIt = subjectResource.listProperties();
while (stmtIt.hasNext()) {
Statement stmt = stmtIt.nextStatement();
// identifying the main node
if (stmt.getObject().toString().startsWith(KEEP_NODE_PREFIX))
return false;
submodel.add(stmt);
}
return true;
}
/**
* Creates and pushes two documents: the json document with the index
* properties and the json document itself. The 'expanded' JSON-LD
* serialization is used to guarantee consistent field types.
*
* @param model
* @param id
*/
private void toJson(Model model, String id, String aboutJson) {
if (model.isEmpty())
return;
try {
Object json = JsonLdProcessor.fromRDF(model, new JsonLdOptions(), parser);
// the json document itself
json = JsonLdProcessor.expand(json);
getReceiver().process(addInternalProperties(new HashMap<String, String>(),
id, JSONUtils.toString(json), aboutJson));
} catch (JsonLdError e) {
e.printStackTrace();
}
}
private static HashMap<String, String> addInternalProperties(
HashMap<String, String> jsonMap, String id, String json,
String aboutJson) {
String internal_parent = "";
String type = TYPE_RESOURCE;
if (id.startsWith(LOBID_ITEM_URI_PREFIX)) {
type = TYPE_ITEM;
try {
JsonNode node = new ObjectMapper().readValue(json, JsonNode.class);
final JsonNode parent = node.findValue(PROPERTY_TO_PARENT);
String p = parent != null ? parent.findValue("@id").asText() : null;
internal_parent = ",\"_parent\":\"" + p + "\"";
if (p == null) {
LOG.warn("Item URI " + id + " has no parent declared!");
jsonMap.put(ElasticsearchIndexer.Properties.PARENT.getName(),
"no_parent");
} else
jsonMap.put(ElasticsearchIndexer.Properties.PARENT.getName(), p);
} catch (IOException e) {
e.printStackTrace();
}
}
// wrap json into a "@graph" for elasticsearch (still valid JSON-LD)
String jsonDocument = "{\"@graph\":" + json + ",\"internal_id\":\"" + id
+ "\"" + internal_parent + aboutJson + "}";
jsonMap.put(ElasticsearchIndexer.Properties.GRAPH.getName(), jsonDocument);
jsonMap.put(ElasticsearchIndexer.Properties.TYPE.getName(), type);
jsonMap.put(ElasticsearchIndexer.Properties.ID.getName(), id);
return jsonMap;
}
}