/**
* This file is part of d:swarm graph extension.
*
* d:swarm graph extension is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* d:swarm graph extension is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with d:swarm graph extension. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dswarm.graph.batch.parse;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDFS;
import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.helpers.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.dswarm.graph.DMPGraphException;
import org.dswarm.graph.Neo4jProcessor;
import org.dswarm.graph.NodeType;
import org.dswarm.graph.batch.BatchNeo4jProcessor;
import org.dswarm.graph.model.GraphStatics;
import org.dswarm.graph.model.Statement;
import org.dswarm.graph.parse.Neo4jHandler;
/**
* @author tgaengler
*/
public abstract class BaseNeo4jHandler implements Neo4jHandler {
private static final Logger LOG = LoggerFactory.getLogger(BaseNeo4jHandler.class);
private static final int TX_CHUNK_SIZE = 200000;
private static final int TX_TIME_DELTA = 30;
protected int totalTriples = 0;
protected int addedNodes = 0;
protected int addedRelationships = 0;
protected int sinceLastCommit = 0;
protected int i = 0;
protected int literals = 0;
protected long tick = System.currentTimeMillis();
protected String resourceUri;
protected long resourceHash;
protected final BatchNeo4jProcessor processor;
protected static final Label rdfsClassLabel = DynamicLabel.label(RDFS.Class.getURI());
public BaseNeo4jHandler(final BatchNeo4jProcessor processorArg) throws DMPGraphException {
processor = processorArg;
}
public Neo4jProcessor getProcessor() {
return processor;
}
@Override
public void setResourceUri(final String resourceUriArg) {
resourceUri = resourceUriArg;
}
@Override
public void setResourceHash(final long resourceHashArg) {
resourceHash = resourceHashArg;
}
@Override
public void resetResourceIndexCounter() {
// TODO
}
@Override
public void handleStatement(final Statement statement) throws DMPGraphException {
// utilise r for the resource property
i++;
try {
if (!statement.getOptionalSubjectNodeType().isPresent() || !statement.getOptionalPredicateURI().isPresent()
|| !statement.getOptionalObjectNodeType().isPresent()) {
throw new DMPGraphException("cannot handle statement, because no subject node type or predicate uri or object node type is present");
}
final NodeType subjectNodeType = statement.getOptionalSubjectNodeType().get();
final NodeType objectNodeType = statement.getOptionalObjectNodeType().get();
final Optional<String> optionalSubjectURI = statement.getOptionalSubjectURI();
final Optional<Long> optionalSubjectUriDataModelUriHash;
if (optionalSubjectURI.isPresent()) {
optionalSubjectUriDataModelUriHash = Optional
.of(processor.generateResourceHash(optionalSubjectURI.get(), statement.getOptionalSubjectDataModelURI()));
} else {
optionalSubjectUriDataModelUriHash = Optional.empty();
}
// Check index for subject
// TODO: what should we do, if the subject is a resource type?
final Optional<Long> optionalSubjectNodeId = processor.determineNode(statement.getOptionalSubjectNodeType(),
statement.getOptionalSubjectId(), statement.getOptionalSubjectURI(), statement.getOptionalSubjectDataModelURI());
final long subjectNodeId;
if (optionalSubjectNodeId.isPresent()) {
subjectNodeId = optionalSubjectNodeId.get();
} else {
final Map<String, Object> subjectNodeProperties = new HashMap<>();
if (NodeType.Resource.equals(subjectNodeType) || NodeType.TypeResource.equals(subjectNodeType)) {
// subject is a resource node
final String subjectURI = statement.getOptionalSubjectURI().get();
subjectNodeProperties.put(GraphStatics.URI_PROPERTY, subjectURI);
subjectNodeProperties.put(GraphStatics.NODETYPE_PROPERTY, NodeType.Resource.toString());
processor.handleSubjectDataModel(subjectNodeProperties, subjectURI, statement.getOptionalSubjectDataModelURI());
subjectNodeId = processor.getBatchInserter().createNode(subjectNodeProperties);
processor.addToResourcesIndex(subjectURI, subjectNodeId);
processor.addObjectToResourceWDataModelIndex(subjectNodeId, subjectURI, statement.getOptionalSubjectDataModelURI());
} else {
// subject is a blank node
// note: can I expect an id here?
subjectNodeProperties.put(GraphStatics.NODETYPE_PROPERTY, NodeType.BNode.toString());
subjectNodeId = processor.getBatchInserter().createNode(subjectNodeProperties);
processor.addToBNodesIndex(statement.getOptionalSubjectId().get(), subjectNodeId);
}
addedNodes++;
}
if (NodeType.Literal.equals(objectNodeType)) {
handleLiteral(subjectNodeId, statement, optionalSubjectUriDataModelUriHash);
} else { // must be Resource
// Make sure object exists
boolean isType = false;
// add Label if this is a type entry
if (statement.getOptionalPredicateURI().get().equals(RDF.type.getURI())) {
processor.addLabel(subjectNodeId, statement.getOptionalObjectURI().get());
isType = true;
}
final NodeType finalObjectNodeType;
if (!isType) {
finalObjectNodeType = objectNodeType;
} else {
switch (objectNodeType) {
case Resource:
finalObjectNodeType = NodeType.TypeResource;
break;
case BNode:
finalObjectNodeType = NodeType.TypeBNode;
break;
default:
finalObjectNodeType = objectNodeType;
}
}
final Optional<NodeType> finalOptionalObjectNodeType = Optional.of(finalObjectNodeType);
// Check index for object
final Optional<Long> optionalObjectNodeId = processor.determineNode(finalOptionalObjectNodeType, statement.getOptionalObjectId(),
statement.getOptionalObjectURI(), statement.getOptionalObjectDataModelURI());
final long objectNodeId;
final Optional<Long> optionalResourceHash;
if (optionalObjectNodeId.isPresent()) {
objectNodeId = optionalObjectNodeId.get();
optionalResourceHash = Optional.empty();
} else {
final Map<String, Object> objectNodeProperties = new HashMap<>();
if (NodeType.Resource.equals(finalObjectNodeType) || NodeType.TypeResource.equals(finalObjectNodeType)) {
// object is a resource node
final String objectURI = statement.getOptionalObjectURI().get();
objectNodeProperties.put(GraphStatics.URI_PROPERTY, objectURI);
switch (finalObjectNodeType) {
case Resource:
objectNodeProperties.put(GraphStatics.NODETYPE_PROPERTY, NodeType.Resource.toString());
processor.handleObjectDataModel(objectNodeProperties, statement.getOptionalObjectDataModelURI());
objectNodeId = processor.getBatchInserter().createNode(objectNodeProperties);
break;
case TypeResource:
objectNodeProperties.put(GraphStatics.NODETYPE_PROPERTY, NodeType.TypeResource.toString());
objectNodeId = processor.getBatchInserter().createNode(objectNodeProperties, rdfsClassLabel);
processor.addToResourceTypesIndex(objectURI, objectNodeId);
break;
default:
throw new DMPGraphException("object must be a resource or a type resource at this moment");
}
processor.addToResourcesIndex(objectURI, objectNodeId);
processor.addObjectToResourceWDataModelIndex(objectNodeId, objectURI, statement.getOptionalObjectDataModelURI());
optionalResourceHash = Optional.empty();
} else {
final Pair<Long, Optional<Long>> result = handleBNode(subjectNodeId, statement, objectNodeProperties,
finalOptionalObjectNodeType, optionalSubjectUriDataModelUriHash);
objectNodeId = result.first();
optionalResourceHash = result.other();
}
addedNodes++;
}
final long hash = processor.generateStatementHash(subjectNodeId, statement.getOptionalPredicateURI().get(), objectNodeId,
subjectNodeType, finalObjectNodeType);
final boolean statementExists = processor.checkStatementExists(hash);
if (!statementExists) {
final Optional<Long> finalOptionalResourceHash;
if (!optionalResourceHash.isPresent()) {
finalOptionalResourceHash = statement.getOptionalResourceHash();
} else {
finalOptionalResourceHash = optionalResourceHash;
}
addRelationship(subjectNodeId, statement.getOptionalPredicateURI().get(), objectNodeId, statement.getOptionalSubjectNodeType(),
optionalSubjectUriDataModelUriHash, statement.getOptionalStatementUUID(), finalOptionalResourceHash,
statement.getOptionalQualifiedAttributes(), hash);
}
}
totalTriples++;
final long nodeDelta = totalTriples - sinceLastCommit;
final long timeDelta = (System.currentTimeMillis() - tick) / 1000;
if (nodeDelta >= TX_CHUNK_SIZE || timeDelta >= TX_TIME_DELTA) { // "commit" every 200k operations or every 30 seconds
sinceLastCommit = totalTriples;
final double duration = (double) nodeDelta / timeDelta;
BaseNeo4jHandler.LOG.debug("{} triples @ ~{} triples/second.", totalTriples, duration);
tick = System.currentTimeMillis();
}
} catch (final Exception e) {
final String message = "couldn't finish write \"TX\" successfully";
BaseNeo4jHandler.LOG.error(message, e);
throw new DMPGraphException(message);
}
}
@Override
public void closeTransaction() throws DMPGraphException {
BaseNeo4jHandler.LOG.debug("close writing finally");
processor.clearMaps();
processor.flushIndices();
}
@Override
public long getCountedStatements() {
return totalTriples;
}
@Override
public int getNodesAdded() {
return addedNodes;
}
@Override
public int getRelationshipsAdded() {
return addedRelationships;
}
@Override
public int getCountedLiterals() {
return literals;
}
public Pair<Long, Optional<Long>> handleBNode(final long subjectNodeId, final Statement statement,
final Map<String, Object> objectNodeProperties, final Optional<NodeType> optionalObjectNodeType, final Optional<Long> optionalSubjectHash)
throws DMPGraphException {
if (!optionalObjectNodeType.isPresent()) {
throw new DMPGraphException("there is no object node type present");
}
final Optional<Long> optionalResourceHash;
// object is a blank node
final NodeType objectNodeType = optionalObjectNodeType.get();
objectNodeProperties.put(GraphStatics.NODETYPE_PROPERTY, objectNodeType.toString());
final Optional<Label> optionalLabel;
if (!NodeType.TypeBNode.equals(objectNodeType)) {
optionalResourceHash = addResourceProperty(subjectNodeId, objectNodeProperties, statement.getOptionalSubjectNodeType(),
optionalSubjectHash, statement.getOptionalResourceHash());
optionalLabel = Optional.empty();
} else {
optionalLabel = Optional.of(rdfsClassLabel);
optionalResourceHash = Optional.empty();
}
final long objectNodeId;
if (!optionalLabel.isPresent()) {
objectNodeId = processor.getBatchInserter().createNode(objectNodeProperties);
} else {
objectNodeId = processor.getBatchInserter().createNode(objectNodeProperties, optionalLabel.get());
}
processor.addToBNodesIndex(statement.getOptionalObjectId().get(), objectNodeId);
return Pair.of(objectNodeId, optionalResourceHash);
}
public void handleLiteral(final long subjectNodeId, final Statement statement, final Optional<Long> optionalSubjectHash)
throws DMPGraphException {
final long hash = processor.generateStatementHash(subjectNodeId, statement);
final boolean statementExists = processor.checkStatementExists(hash);
if (!statementExists) {
literals++;
final Map<String, Object> objectNodeProperties = new HashMap<>();
objectNodeProperties.put(GraphStatics.VALUE_PROPERTY, statement.getOptionalObjectValue().get());
objectNodeProperties.put(GraphStatics.NODETYPE_PROPERTY, NodeType.Literal.toString());
final Optional<Long> optionalResourceHash = addResourceProperty(subjectNodeId, objectNodeProperties,
statement.getOptionalSubjectNodeType(), optionalSubjectHash, statement.getOptionalResourceHash());
final long objectNodeId = processor.getBatchInserter().createNode(objectNodeProperties);
processor.addToValueIndex(statement.getOptionalObjectValue().get(), objectNodeId);
addedNodes++;
addRelationship(subjectNodeId, statement.getOptionalPredicateURI().get(), objectNodeId, statement.getOptionalSubjectNodeType(),
optionalSubjectHash, statement.getOptionalStatementUUID(), optionalResourceHash,
statement.getOptionalQualifiedAttributes(), hash);
}
}
/**
* TODO: refactor this to protected
*
* @param subjectNodeId
* @param predicateURI
* @param objectNodeId
* @param optionalSubjectNodeType
* @param optionalSubjectURI
* @param optionalStatementUUID
* @param optionalResourceUri
* @param optionalQualifiedAttributes
* @param hash
* @return
* @throws org.dswarm.graph.DMPGraphException
*/
public long addRelationship(final long subjectNodeId, final String predicateURI, final long objectNodeId,
final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectURI,
final Optional<String> optionalStatementUUID, final Optional<Long> optionalResourceUri,
final Optional<Map<String, Object>> optionalQualifiedAttributes, final long hash) throws DMPGraphException {
final String finalStatementUUID;
if (optionalStatementUUID.isPresent()) {
finalStatementUUID = optionalStatementUUID.get();
} else {
finalStatementUUID = UUID.randomUUID().toString();
}
final Map<String, Object> relProperties = processor.prepareRelationship(finalStatementUUID, optionalQualifiedAttributes);
addResourcePropertyToRelationship(subjectNodeId, relProperties, optionalSubjectNodeType, optionalSubjectURI, optionalResourceUri);
final RelationshipType relType = DynamicRelationshipType.withName(predicateURI);
final long relId = processor.getBatchInserter().createRelationship(subjectNodeId, objectNodeId, relType, relProperties);
// TODO: for now we only keey the hash
processor.addToStatementIndex(hash);
processor.addStatementToIndex(relId, finalStatementUUID);
addedRelationships++;
return relId;
}
protected Optional<Long> addResourceProperty(final long subjectNodeId, final Map<String, Object> objectProperties,
final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash) {
final Optional<Long> finalOptionalResourceHash = processor.determineResourceHash(subjectNodeId, optionalSubjectNodeType, optionalSubjectHash,
optionalResourceHash);
if (!finalOptionalResourceHash.isPresent()) {
return Optional.empty();
}
objectProperties.put(GraphStatics.RESOURCE_PROPERTY, finalOptionalResourceHash.get());
return finalOptionalResourceHash;
}
protected Optional<Long> addResourcePropertyToRelationship(final long subjectNodeId, final Map<String, Object> relProperties,
final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash) {
final Optional<Long> finalOptionalResourceHash;
if (optionalResourceHash.isPresent()) {
finalOptionalResourceHash = optionalResourceHash;
} else {
finalOptionalResourceHash = processor
.determineResourceHash(subjectNodeId, optionalSubjectNodeType, optionalSubjectHash, optionalResourceHash);
}
if (finalOptionalResourceHash.isPresent()) {
relProperties.put(GraphStatics.RESOURCE_PROPERTY, finalOptionalResourceHash.get());
}
return finalOptionalResourceHash;
}
public void addBNode(final Optional<String> optionalNodeId, final Optional<NodeType> optionalNodeType, final long nodeId)
throws DMPGraphException {
if (!optionalNodeId.isPresent() || !optionalNodeType.isPresent()) {
throw new DMPGraphException("cannot add bnode, because the node id or node type is not present");
}
switch (optionalNodeType.get()) {
case BNode:
processor.addToBNodesIndex(optionalNodeId.get(), nodeId);
break;
}
}
}