/** * This file is part of d:swarm graph extension. * * d:swarm graph extension is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * d:swarm graph extension is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with d:swarm graph extension. If not, see <http://www.gnu.org/licenses/>. */ package org.dswarm.graph.parse; import java.util.Map; import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicLong; import org.apache.jena.vocabulary.RDF; import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Relationship; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.dswarm.graph.BasicNeo4jProcessor; import org.dswarm.graph.DMPGraphException; import org.dswarm.graph.Neo4jProcessor; import org.dswarm.graph.NodeType; import org.dswarm.graph.hash.HashUtils; import org.dswarm.graph.model.GraphStatics; import org.dswarm.graph.model.Statement; import org.dswarm.graph.versioning.VersionHandler; import org.dswarm.graph.versioning.VersioningStatics; /** * @author tgaengler */ public abstract class BaseNeo4jHandler implements Neo4jHandler, Neo4jUpdateHandler { private static final Logger LOG = LoggerFactory.getLogger(BaseNeo4jHandler.class); private static final int TX_CHUNK_SIZE = 50000; private static final int TX_TIME_DELTA = 30; protected int totalTriples = 0; protected int addedNodes = 0; protected int addedRelationships = 0; protected int sinceLastCommit = 0; protected int i = 0; protected int literals = 0; protected long tick = System.currentTimeMillis(); protected String resourceUri; protected long resourceHash; protected AtomicLong resourceIndexCounter = new AtomicLong(0); // TODO: init protected VersionHandler versionHandler = null; protected final BasicNeo4jProcessor processor; protected final boolean enableVersioning; public BaseNeo4jHandler(final BasicNeo4jProcessor processorArg, final boolean enableVersioningArg) throws DMPGraphException { processor = processorArg; enableVersioning = enableVersioningArg; init(); } @Override public Neo4jProcessor getProcessor() { return processor; } @Override public void setResourceUri(final String resourceUriArg) throws DMPGraphException { resourceUri = resourceUriArg; } @Override public void setResourceHash(final long resourceHashArg) { resourceHash = resourceHashArg; } @Override public void resetResourceIndexCounter() { resourceIndexCounter = new AtomicLong(0); } @Override public VersionHandler getVersionHandler() { return versionHandler; } @Override public void handleStatement(Statement statement) throws DMPGraphException { // utilise r for the resource property i++; processor.ensureRunningTx(); try { final Optional<String> optionalPredicateURI = statement.getOptionalPredicateURI(); if (!statement.getOptionalSubjectNodeType().isPresent() || !optionalPredicateURI.isPresent() || !statement.getOptionalObjectNodeType().isPresent()) { throw new DMPGraphException("cannot handle statement, because no subject node type or predicate uri or object node type is present"); } final NodeType subjectNodeType = statement.getOptionalSubjectNodeType().get(); final NodeType objectNodeType = statement.getOptionalObjectNodeType().get(); // Check index for subject // TODO: what should we do, if the subject is a resource type? final Optional<String> optionalPrefixedSubjectURI = processor.optionalCreatePrefixedURI(statement.getOptionalSubjectURI()); final Optional<String> optionalPrefixedSubjectDataModelURI = processor .optionalCreatePrefixedURI(statement.getOptionalSubjectDataModelURI()); final Optional<Long> optionalSubjectUriDataModelUriHash; if (optionalPrefixedSubjectURI.isPresent()) { optionalSubjectUriDataModelUriHash = Optional .of(processor.generateResourceHash(optionalPrefixedSubjectURI.get(), optionalPrefixedSubjectDataModelURI)); } else { optionalSubjectUriDataModelUriHash = Optional.empty(); } final Optional<Node> optionalSubjectNode = processor.determineNode(statement.getOptionalSubjectNodeType(), statement.getOptionalSubjectId(), optionalPrefixedSubjectURI, optionalPrefixedSubjectDataModelURI, optionalSubjectUriDataModelUriHash); final Node subjectNode; if (optionalSubjectNode.isPresent()) { subjectNode = optionalSubjectNode.get(); } else { final Label subjectLabel = processor.getLabel(subjectNodeType.toString()); subjectNode = processor.getDatabase().createNode(subjectLabel); if (NodeType.Resource.equals(subjectNodeType) || NodeType.TypeResource.equals(subjectNodeType)) { if (NodeType.TypeResource.equals(subjectNodeType)) { processor.addLabel(subjectNode, NodeType.Resource.toString()); } // subject is a resource node final String subjectURI = optionalPrefixedSubjectURI.get(); subjectNode.setProperty(GraphStatics.URI_PROPERTY, subjectURI); subjectNode.setProperty(GraphStatics.HASH, optionalSubjectUriDataModelUriHash.get()); if (resourceHash == optionalSubjectUriDataModelUriHash.get()) { versionHandler.setLatestVersion(optionalPrefixedSubjectDataModelURI); } processor.handleSubjectDataModel(subjectNode, subjectURI, optionalPrefixedSubjectDataModelURI); processor.addNodeToResourcesIndex(subjectURI, subjectNode); } else { // subject is a blank node // note: can I expect an id here? processor.addNodeToBNodesIndex(statement.getOptionalSubjectId().get(), subjectNode); } addedNodes++; } final Optional<Long> optionalStatementResourceHash; if (!statement.getOptionalResourceHash().isPresent()) { optionalStatementResourceHash = Optional.of(resourceHash); } else { optionalStatementResourceHash = statement.getOptionalResourceHash(); } final Optional<String> optionalPrefixedPredicateURI = processor.optionalCreatePrefixedURI(optionalPredicateURI); if (NodeType.Literal.equals(objectNodeType)) { handleLiteral(subjectNode, statement, optionalSubjectUriDataModelUriHash, optionalStatementResourceHash, optionalPrefixedPredicateURI); } else { // must be Resource // Make sure object exists final Optional<String> optionalPrefixedObjectURI = processor.optionalCreatePrefixedURI(statement.getOptionalObjectURI()); boolean isType = false; // add Label to subject node, if object is a type entry if (optionalPrefixedObjectURI.isPresent() && optionalPredicateURI.get().equals(RDF.type.getURI())) { processor.addLabel(subjectNode, optionalPrefixedObjectURI.get()); isType = true; } final NodeType finalObjectNodeType; if (!isType) { finalObjectNodeType = objectNodeType; } else { // correct/enhance node type switch (objectNodeType) { case Resource: finalObjectNodeType = NodeType.TypeResource; break; case BNode: finalObjectNodeType = NodeType.TypeBNode; break; default: finalObjectNodeType = objectNodeType; } } final Optional<String> optionalPrefixedObjectDataModelURI = processor .optionalCreatePrefixedURI(statement.getOptionalObjectDataModelURI()); final Optional<NodeType> finalOptionalObjectNodeType = Optional.of(finalObjectNodeType); final Optional<Long> optionalObjectResourceUriDataModelUriHash; if (optionalPrefixedObjectURI.isPresent()) { // because type resources doesn't belong to any data model optionalObjectResourceUriDataModelUriHash = Optional.of(HashUtils.generateHash(optionalPrefixedObjectURI.get())); } else { optionalObjectResourceUriDataModelUriHash = Optional.empty(); } // Check index for object final Optional<Node> optionalObjectNode = processor.determineNode(finalOptionalObjectNodeType, statement.getOptionalObjectId(), optionalPrefixedObjectURI, optionalPrefixedObjectDataModelURI, optionalObjectResourceUriDataModelUriHash); final Node objectNode; final Optional<Long> optionalResourceHash; if (optionalObjectNode.isPresent()) { objectNode = optionalObjectNode.get(); optionalResourceHash = Optional.empty(); } else { final Label objectLabel = processor.getLabel(finalObjectNodeType.toString()); objectNode = processor.getDatabase().createNode(objectLabel); if (NodeType.Resource.equals(finalObjectNodeType) || NodeType.TypeResource.equals(finalObjectNodeType)) { // object is a resource node final String objectURI = optionalPrefixedObjectURI.get(); objectNode.setProperty(GraphStatics.URI_PROPERTY, objectURI); objectNode.setProperty(GraphStatics.HASH, optionalObjectResourceUriDataModelUriHash.get()); switch (finalObjectNodeType) { case Resource: processor.handleObjectDataModel(objectNode, optionalPrefixedObjectDataModelURI); break; case TypeResource: processor.addLabel(objectNode, processor.getNamespaceIndex().getRDFCLASSPrefixedURI()); processor.addLabel(objectNode, NodeType.Resource.toString()); processor.addNodeToResourceTypesIndex(objectURI, objectNode); break; } processor.addObjectToResourceWDataModelIndex(objectNode, objectURI, optionalPrefixedObjectDataModelURI); optionalResourceHash = Optional.empty(); } else { optionalResourceHash = handleBNode(subjectNode, statement, objectNode, finalOptionalObjectNodeType, optionalSubjectUriDataModelUriHash, optionalStatementResourceHash); } addedNodes++; } // leave out, rdf:type statements for now (enable, them if footprint is not too high) if (!isType) { final long hash = processor.generateStatementHash(subjectNode, optionalPrefixedPredicateURI.get(), objectNode, subjectNodeType, finalObjectNodeType); final boolean statementExists = processor.checkStatementExists(hash); if (!statementExists) { final Optional<Long> finalOptionalResourceHash; if (!optionalResourceHash.isPresent()) { finalOptionalResourceHash = optionalStatementResourceHash; } else { finalOptionalResourceHash = optionalResourceHash; } addRelationship(subjectNode, optionalPrefixedPredicateURI.get(), objectNode, statement.getOptionalSubjectNodeType(), optionalSubjectUriDataModelUriHash, statement.getOptionalStatementUUID(), finalOptionalResourceHash, statement.getOptionalQualifiedAttributes(), hash); } } } totalTriples++; final long nodeDelta = totalTriples - sinceLastCommit; final long timeDelta = (System.currentTimeMillis() - tick) / 1000; if (nodeDelta >= TX_CHUNK_SIZE || timeDelta >= TX_TIME_DELTA) { // Commit every 50k operations or every 30 seconds processor.renewTx(); sinceLastCommit = totalTriples; final double duration = (double) nodeDelta / timeDelta; LOG.debug("{} triples @ ~{} triples/second.", totalTriples, duration); tick = System.currentTimeMillis(); } } catch (final Exception e) { final String message = "couldn't finish write TX successfully"; LOG.error(message, e); processor.failTx(); throw new DMPGraphException(message); } } // @Override // public void deprecateStatement(long index) { // // throw new NotImplementedException(); // } @Override public Relationship deprecateStatement(final Long uuid) throws DMPGraphException { processor.ensureRunningTx(); try { final Optional<Relationship> optionalRel = processor.getRelationshipFromStatementIndex(uuid); if (!optionalRel.isPresent()) { BaseNeo4jHandler.LOG.error("couldn't find statement with the uuid '{}' in the database", uuid); } final Relationship rel = optionalRel.get(); rel.setProperty(VersioningStatics.VALID_TO_PROPERTY, versionHandler.getLatestVersion()); // remove statement hash from statement hashes index final long statementHash = processor.generateStatementHash(rel); processor.removeHashFromStatementIndex(statementHash); return rel; } catch (final Exception e) { final String message = "couldn't deprecate statement successfully"; processor.failTx(); BaseNeo4jHandler.LOG.error(message, e); BaseNeo4jHandler.LOG.debug("couldn't finish write TX successfully"); throw new DMPGraphException(message); } } @Override public void closeTransaction() throws DMPGraphException { LOG.debug("close write TX finally"); processor.succeedTx(); processor.clearMaps(); } @Override public long getCountedStatements() { return totalTriples; } @Override public int getNodesAdded() { return addedNodes; } @Override public int getRelationshipsAdded() { return addedRelationships; } @Override public int getCountedLiterals() { return literals; } protected abstract void init() throws DMPGraphException; public Optional<Long> handleBNode(final Node subjectNode, final Statement statement, final Node objectNode, final Optional<NodeType> optionalObjectNodeType, final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash) throws DMPGraphException { if (!optionalObjectNodeType.isPresent()) { throw new DMPGraphException("there is no object node type present"); } final Optional<Long> finalOptionalResourceHash; // object is a blank node processor.addNodeToBNodesIndex(statement.getOptionalObjectId().get(), objectNode); final NodeType objectNodeType = optionalObjectNodeType.get(); if (!NodeType.TypeBNode.equals(objectNodeType)) { finalOptionalResourceHash = addResourceProperty(subjectNode, objectNode, statement.getOptionalSubjectNodeType(), optionalSubjectHash, optionalResourceHash); } else { processor.addLabel(objectNode, processor.getNamespaceIndex().getRDFCLASSPrefixedURI()); processor.addLabel(objectNode, NodeType.BNode.toString()); finalOptionalResourceHash = Optional.empty(); } return finalOptionalResourceHash; } public void handleLiteral(final Node subjectNode, final Statement statement, final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash, final Optional<String> optionalPrefixedPredicateURI) throws DMPGraphException { final long hash = processor.generateStatementHash(subjectNode, statement, optionalPrefixedPredicateURI); final boolean statementExists = processor.checkStatementExists(hash); if (!statementExists) { literals++; final Label objectLabel = processor.getLabel(NodeType.Literal.toString()); final Node objectNode = processor.getDatabase().createNode(objectLabel); objectNode.setProperty(GraphStatics.VALUE_PROPERTY, statement.getOptionalObjectValue().get()); //objectNode.setProperty(GraphStatics.NODETYPE_PROPERTY, NodeType.Literal.toString()); //processor.addNodeToValueIndex(objectNode, GraphStatics.VALUE, statement.getOptionalObjectValue().get()); final Optional<Long> finalOptionalResourceHash = addResourceProperty(subjectNode, objectNode, statement.getOptionalSubjectNodeType(), optionalSubjectHash, optionalResourceHash); addedNodes++; addRelationship(subjectNode, optionalPrefixedPredicateURI.get(), objectNode, statement.getOptionalSubjectNodeType(), optionalSubjectHash, statement.getOptionalStatementUUID(), finalOptionalResourceHash, statement.getOptionalQualifiedAttributes(), hash); } } /** * TODO: refactor this to protected * * @param subjectNode * @param predicateURI * @param objectNode * @param optionalSubjectNodeType * @param optionalSubjectHash * @param optionalStatementUUID * @param optionalResourceHash * @param optionalQualifiedAttributes * @param hash * @return * @throws DMPGraphException */ public Relationship addRelationship(final Node subjectNode, final String predicateURI, final Node objectNode, final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectHash, final Optional<String> optionalStatementUUID, final Optional<Long> optionalResourceHash, final Optional<Map<String, Object>> optionalQualifiedAttributes, final long hash) throws DMPGraphException { final String finalStatementUUID; if (optionalStatementUUID.isPresent()) { finalStatementUUID = optionalStatementUUID.get(); } else { finalStatementUUID = UUID.randomUUID().toString(); } final long statementUUIDHash = HashUtils.getUUID(finalStatementUUID); final Relationship rel = processor.prepareRelationship(subjectNode, predicateURI, objectNode, statementUUIDHash, optionalQualifiedAttributes, Optional.of(resourceIndexCounter.incrementAndGet()), versionHandler); processor.addHashToStatementIndex(hash); processor.addStatementToIndex(rel, statementUUIDHash); addedRelationships++; addResourceProperty(subjectNode, rel, optionalSubjectNodeType, optionalSubjectHash, optionalResourceHash); return rel; } protected Optional<Long> addResourceProperty(final Node subjectNode, final Node objectNode, final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash) { final Optional<Long> finalOptionalResourceHash = processor.determineResourceHash(subjectNode, optionalSubjectNodeType, optionalSubjectHash, optionalResourceHash); if (!finalOptionalResourceHash.isPresent()) { return Optional.empty(); } objectNode.setProperty(GraphStatics.RESOURCE_PROPERTY, finalOptionalResourceHash.get()); return finalOptionalResourceHash; } protected Optional<Long> addResourceProperty(final Node subjectNode, final Relationship rel, final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash) { final Optional<Long> finalOptionalResourceHash; if (optionalResourceHash.isPresent()) { finalOptionalResourceHash = optionalResourceHash; } else { finalOptionalResourceHash = processor.determineResourceHash(subjectNode, optionalSubjectNodeType, optionalSubjectHash, optionalResourceHash); } if (finalOptionalResourceHash.isPresent()) { rel.setProperty(GraphStatics.RESOURCE_PROPERTY, finalOptionalResourceHash.get()); } return finalOptionalResourceHash; } public void addBNode(final Optional<String> optionalNodeId, final Optional<NodeType> optionalNodeType, final Node node) throws DMPGraphException { if (!optionalNodeId.isPresent() || !optionalNodeType.isPresent()) { throw new DMPGraphException("cannot add bnode, because the node id or node type is not present"); } switch (optionalNodeType.get()) { case BNode: processor.addNodeToBNodesIndex(optionalNodeId.get(), node); break; } } }