package io.lumify.rdf; import com.hp.hpl.jena.datatypes.RDFDatatype; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; import com.hp.hpl.jena.rdf.model.*; import io.lumify.core.exception.LumifyException; import io.lumify.core.ingest.graphProperty.GraphPropertyWorkData; import io.lumify.core.ingest.graphProperty.GraphPropertyWorker; import io.lumify.core.ingest.graphProperty.GraphPropertyWorkerPrepareData; import io.lumify.core.model.properties.LumifyProperties; import io.lumify.core.util.LumifyLogger; import io.lumify.core.util.LumifyLoggerFactory; import org.apache.commons.codec.digest.DigestUtils; import org.json.JSONObject; import org.securegraph.*; import org.securegraph.Property; import org.securegraph.property.StreamingPropertyValue; import java.io.*; import java.util.ArrayList; import java.util.List; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; public class RdfGraphPropertyWorker extends GraphPropertyWorker { private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(RdfGraphPropertyWorker.class); public static final String RDF_TYPE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; private String hasEntityIri; private String rdfConceptTypeIri; @Override public void prepare(GraphPropertyWorkerPrepareData workerPrepareData) throws Exception { super.prepare(workerPrepareData); hasEntityIri = getOntologyRepository().getRequiredRelationshipIRIByIntent("artifactHasEntity"); // rdfConceptTypeIri is not required because the concept type could have been set by some other means. rdfConceptTypeIri = getOntologyRepository().getConceptIRIByIntent("rdf"); } @Override public void execute(InputStream in, GraphPropertyWorkData data) throws Exception { importRdf(getGraph(), in, null, data, data.getVisibility(), getAuthorizations()); } @Override public boolean isHandled(Element element, Property property) { if (property == null) { return false; } String mimeType = LumifyProperties.MIME_TYPE.getPropertyValue(element); if (!RdfOntology.MIME_TYPE_TEXT_RDF.equals(mimeType)) { return false; } if (!LumifyProperties.RAW.getPropertyName().equals(property.getName())) { return false; } return true; } public void importRdf(Graph graph, File inputFile, GraphPropertyWorkData data, Visibility visibility, Authorizations authorizations) throws IOException { try (InputStream in = new FileInputStream(inputFile)) { File baseDir = inputFile.getParentFile(); importRdf(graph, in, baseDir, data, visibility, authorizations); } } private void importRdf(Graph graph, InputStream in, File baseDir, GraphPropertyWorkData data, Visibility visibility, Authorizations authorizations) { if (rdfConceptTypeIri != null && data != null) { LumifyProperties.CONCEPT_TYPE.setProperty(data.getElement(), rdfConceptTypeIri, data.createPropertyMetadata(), visibility, getAuthorizations()); } Model model = ModelFactory.createDefaultModel(); model.read(in, null); Results results = new Results(); importRdfModel(results, graph, model, baseDir, data, visibility, authorizations); graph.flush(); LOGGER.debug("pushing vertices from RDF import on to work queue"); for (Vertex vertex : results.getVertices()) { getWorkQueueRepository().pushElement(vertex); for (Property prop : vertex.getProperties()) { getWorkQueueRepository().pushGraphPropertyQueue(vertex, prop); } } LOGGER.debug("pushing edges from RDF import on to work queue"); for (Edge edge : results.getEdges()) { getWorkQueueRepository().pushElement(edge); for (Property prop : edge.getProperties()) { getWorkQueueRepository().pushGraphPropertyQueue(edge, prop); } } } private void importRdfModel(Results results, Graph graph, Model model, File baseDir, GraphPropertyWorkData data, Visibility visibility, Authorizations authorizations) { ResIterator subjects = model.listSubjects(); while (subjects.hasNext()) { Resource subject = subjects.next(); importSubject(results, graph, subject, baseDir, data, visibility, authorizations); } } private void importSubject(Results results, Graph graph, Resource subject, File baseDir, GraphPropertyWorkData data, Visibility visibility, Authorizations authorizations) { LOGGER.info("importSubject: %s", subject.toString()); String graphVertexId = getGraphVertexId(subject); VertexBuilder vertexBuilder = graph.prepareVertex(graphVertexId, visibility); if (data != null) { data.setVisibilityJsonOnElement(vertexBuilder); } StmtIterator statements = subject.listProperties(); while (statements.hasNext()) { Statement statement = statements.next(); RDFNode obj = statement.getObject(); if (obj instanceof Resource) { if (isConceptTypeResource(statement)) { String value = statement.getResource().toString(); Metadata metadata = null; if (data != null) { metadata = data.createPropertyMetadata(); } LumifyProperties.CONCEPT_TYPE.setProperty(vertexBuilder, value, metadata, visibility); } } else if (obj instanceof Literal) { LOGGER.info("set property on %s to %s", subject.toString(), statement.toString()); importLiteral(vertexBuilder, statement, baseDir, data, visibility); } else { throw new LumifyException("Unhandled object type: " + obj.getClass().getName()); } } Vertex v = vertexBuilder.save(authorizations); results.addVertex(v); if (data != null) { String edgeId = data.getElement().getId() + "_hasEntity_" + v.getId(); EdgeBuilder e = graph.prepareEdge(edgeId, (Vertex) data.getElement(), v, hasEntityIri, visibility); data.setVisibilityJsonOnElement(e); results.addEdge(e.save(authorizations)); addVertexToWorkspaceIfNeeded(data, v); } statements = subject.listProperties(); while (statements.hasNext()) { Statement statement = statements.next(); RDFNode obj = statement.getObject(); if (obj instanceof Resource) { if (isConceptTypeResource(statement)) { continue; } importResource(results, graph, v, statement, data, visibility, authorizations); } } } private boolean isConceptTypeResource(Statement statement) { String label = statement.getPredicate().toString(); return label.equals(RDF_TYPE_URI); } private void importLiteral(VertexBuilder v, Statement statement, File baseDir, GraphPropertyWorkData data, Visibility visibility) { String propertyName = statement.getPredicate().toString(); RDFDatatype datatype = statement.getLiteral().getDatatype(); Object literalValue = statement.getLiteral().getValue(); Object value = literalValue; String propertyKey = RdfGraphPropertyWorker.class.getName() + "_" + hashValue(value.toString()); if (datatype == null || XSDDatatype.XSDstring.equals(datatype)) { String valueString = statement.getLiteral().toString(); if (valueString.startsWith("streamingValue:")) { value = convertStreamingValueJsonToValueObject(baseDir, valueString); } } else if (literalValue instanceof XSDDateTime) { XSDDateTime xsdDateTime = (XSDDateTime) literalValue; value = xsdDateTime.asCalendar().getTime(); } else { throw new LumifyException("unsupported XSDDatatype: " + datatype.getURI()); } Metadata metadata = null; if (data != null) { metadata = data.createPropertyMetadata(); } v.addPropertyValue(propertyKey, propertyName, value, metadata, visibility); } private String hashValue(String valueString) { // we need a unique value but it's a bit silly to store a whole md5 hash return DigestUtils.md5Hex(valueString).substring(0, 10); } private Object convertStreamingValueJsonToValueObject(File baseDir, String valueString) { JSONObject streamingValueJson = new JSONObject(valueString.substring("streamingValue:".length())); String fileName = streamingValueJson.getString("fileName"); if (baseDir == null) { throw new LumifyException("Could not import streamingValue. No baseDir specified."); } File file = new File(baseDir, fileName); InputStream in; try { if (!file.exists()) { throw new LumifyException("File " + file.getAbsolutePath() + " does not exist."); } in = new FileInputStream(file); } catch (FileNotFoundException ex) { throw new LumifyException("File " + file.getAbsolutePath() + " does not exist."); } StreamingPropertyValue spv = new StreamingPropertyValue(in, byte[].class); spv.searchIndex(false); spv.store(true); return spv; } private void importResource(Results results, Graph graph, Vertex outVertex, Statement statement, GraphPropertyWorkData data, Visibility visibility, Authorizations authorizations) { String label = statement.getPredicate().toString(); String vertexId = getGraphVertexId(statement.getResource()); VertexBuilder inVertexBuilder = graph.prepareVertex(vertexId, visibility); if (data != null) { data.setVisibilityJsonOnElement(inVertexBuilder); } Vertex inVertex = inVertexBuilder.save(authorizations); results.addVertex(inVertex); if (data != null) { addVertexToWorkspaceIfNeeded(data, inVertex); } String edgeId = outVertex.getId() + "_" + label + "_" + inVertex.getId(); EdgeBuilder e = graph.prepareEdge(edgeId, outVertex, inVertex, label, visibility); if (data != null) { data.setVisibilityJsonOnElement(e); } results.addEdge(e.save(authorizations)); LOGGER.info("importResource: %s = %s", label, vertexId); } /** * RDF requires that all subjects are URIs. To create more portable ids, * this method will look for the last '#' character and return everything after that. */ private String getGraphVertexId(Resource subject) { String subjectUri = subject.getURI(); checkNotNull(subjectUri, "could not get uri of subject: " + subject); int lastPound = subjectUri.lastIndexOf('#'); checkArgument(lastPound >= 1, "Could not find '#' in subject uri: " + subjectUri); return subjectUri.substring(lastPound + 1); } private static class Results { private final List<Vertex> vertices = new ArrayList<>(); private final List<Edge> edges = new ArrayList<>(); public void addEdge(Edge edge) { this.edges.add(edge); } public void addVertex(Vertex vertex) { this.vertices.add(vertex); } public Iterable<Edge> getEdges() { return edges; } public Iterable<Vertex> getVertices() { return vertices; } } }