/**
* This file is part of d:swarm graph extension.
*
* d:swarm graph extension is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* d:swarm graph extension is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with d:swarm graph extension. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dswarm.graph;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Optional;
import com.carrotsearch.hppc.LongLongMap;
import com.carrotsearch.hppc.LongLongOpenHashMap;
import com.google.common.collect.Maps;
import org.mapdb.DB;
import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.index.Index;
import org.neo4j.graphdb.index.IndexHits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.dswarm.common.types.Tuple;
import org.dswarm.graph.hash.HashUtils;
import org.dswarm.graph.index.MapDBUtils;
import org.dswarm.graph.index.NamespaceIndex;
import org.dswarm.graph.model.GraphStatics;
import org.dswarm.graph.model.Statement;
import org.dswarm.graph.tx.TransactionHandler;
import org.dswarm.graph.utils.GraphDatabaseUtils;
import org.dswarm.graph.utils.GraphUtils;
import org.dswarm.graph.versioning.VersionHandler;
/**
* @author tgaengler
*/
public abstract class BasicNeo4jProcessor implements TransactionalNeo4jProcessor {
private static final Logger LOG = LoggerFactory.getLogger(BasicNeo4jProcessor.class);
protected int addedLabels = 0;
protected final GraphDatabaseService database;
private Index<Relationship> statementUUIDs;
protected final Map<String, Node> bnodes;
// protected Index<Relationship> statementHashes;
final private Set<Long> statementHashes;
final private DB statementHashesDB;
final private Set<Long> tempStatementHashes;
final private DB tempStatementHashesDB;
private final NamespaceIndex namespaceIndex;
protected final LongLongMap nodeResourceMap;
// TODO: go offheap, if maps get to big
final private Map<String, Node> tempResourcesIndex;
final private Map<Long, Node> tempResourcesWDataModelIndex;
final private Map<String, Node> tempResourceTypesIndex;
private final Map<String, Label> labelCache;
protected final TransactionHandler tx;
public BasicNeo4jProcessor(final GraphDatabaseService database, final TransactionHandler txArg, final NamespaceIndex namespaceIndexArg) throws DMPGraphException {
this.database = database;
tx = txArg;
namespaceIndex = namespaceIndexArg;
tempResourcesIndex = Maps.newHashMap();
tempResourcesWDataModelIndex = Maps.newHashMap();
tempResourceTypesIndex = Maps.newHashMap();
labelCache = Maps.newHashMap();
beginTx();
LOG.debug("start write TX");
bnodes = new HashMap<>();
nodeResourceMap = new LongLongOpenHashMap();
try {
final Tuple<Set<Long>, DB> mapDBTuple = MapDBUtils
.createOrGetInMemoryLongIndexTreeSetNonTransactional(GraphIndexStatics.TEMP_STATEMENT_HASHES_INDEX_NAME);
tempStatementHashes = mapDBTuple.v1();
tempStatementHashesDB = mapDBTuple.v2();
final Tuple<Set<Long>, DB> mapDBTuple2 = getOrCreateLongIndex(GraphIndexStatics.STATEMENT_HASHES_INDEX_NAME);
statementHashes = mapDBTuple2.v1();
statementHashesDB = mapDBTuple2.v2();
} catch (final IOException e) {
failTx();
throw new DMPGraphException("couldn't create or get statement hashes index");
}
}
protected void initIndices() throws DMPGraphException {
try {
statementUUIDs = database.index().forRelationships(GraphIndexStatics.STATEMENT_UUIDS_INDEX_NAME);
tempResourcesIndex.clear();
tempResourcesWDataModelIndex.clear();
tempResourceTypesIndex.clear();
if (tempStatementHashes != null) {
tempStatementHashes.clear();
}
} catch (final Exception e) {
failTx();
final String message = "couldn't load indices successfully";
BasicNeo4jProcessor.LOG.error(message, e);
BasicNeo4jProcessor.LOG.debug("couldn't finish write TX successfully");
throw new DMPGraphException(message);
}
}
@Override
public GraphDatabaseService getDatabase() {
return database;
}
@Override public NamespaceIndex getNamespaceIndex() {
return namespaceIndex;
}
public void addNodeToBNodesIndex(final String key, final Node bnode) {
bnodes.put(key, bnode);
}
// public void addNodeToValueIndex(final Node literal, final String key, final String value) {
//
// values.putIfAbsent(literal, key, value);
// }
public void addHashToStatementIndex(final long hash) {
tempStatementHashes.add(hash);
}
public Optional<String> optionalCreatePrefixedURI(final Optional<String> optionalFullURI) throws DMPGraphException {
if (!optionalFullURI.isPresent()) {
return Optional.empty();
}
return Optional.of(createPrefixedURI(optionalFullURI.get()));
}
@Override
public String createPrefixedURI(final String fullURI) throws DMPGraphException {
return namespaceIndex.createPrefixedURI(fullURI);
}
public void removeHashFromStatementIndex(final long hash) {
// TODO: maybe cache removals and remove them in one rush
if (statementHashes.contains(hash)) {
statementHashes.remove(hash);
}
}
public void addStatementToIndex(final Relationship rel, final long statementUUID) {
statementUUIDs.putIfAbsent(rel, GraphStatics.UUID, statementUUID);
}
public void clearMaps() {
nodeResourceMap.clear();
bnodes.clear();
tempResourcesIndex.clear();
tempResourcesWDataModelIndex.clear();
tempResourceTypesIndex.clear();
labelCache.clear();
LOG.debug("start clearing and closing mapdb indices");
tempStatementHashes.clear();
closeMapDBIndex(tempStatementHashesDB);
closeMapDBIndex(statementHashesDB);
namespaceIndex.clearMaps();
LOG.debug("finished clearing and closing mapdb indices");
}
public void beginTx() throws DMPGraphException {
BasicNeo4jProcessor.LOG.debug("beginning new tx");
// (optionally) persist namespaces that where utilised before
namespaceIndex.resetTXNamespaces();
tx.ensureRunningTx();
initIndices();
BasicNeo4jProcessor.LOG.debug("new tx is ready");
}
public void renewTx() throws DMPGraphException {
succeedTx();
beginTx();
}
public void failTx() {
BasicNeo4jProcessor.LOG.error("tx failed; closing tx");
closeMapDBIndex(tempStatementHashesDB);
closeMapDBIndex(statementHashesDB);
namespaceIndex.closeMapDBIndices();
tx.failTx();
BasicNeo4jProcessor.LOG.error("tx failed; closed tx");
}
public void succeedTx() throws DMPGraphException {
BasicNeo4jProcessor.LOG.debug("tx succeeded; closing tx");
pumpNFlushStatementIndex();
namespaceIndex.pumpNFlushNamespacePrefixIndex();
tx.succeedTx();
BasicNeo4jProcessor.LOG.debug("tx succeeded; closed tx");
}
public void ensureRunningTx() throws DMPGraphException {
tx.ensureRunningTx();
}
public Optional<Node> determineNode(final Optional<NodeType> optionalResourceNodeType, final Optional<String> optionalResourceId,
final Optional<String> optionalResourceURI, final Optional<String> optionalDataModelURI,
final Optional<Long> optionalResourceUriDataModelUriHash) {
if (!optionalResourceNodeType.isPresent()) {
return Optional.empty();
}
if (NodeType.Resource.equals(optionalResourceNodeType.get()) || NodeType.TypeResource.equals(optionalResourceNodeType.get())) {
// resource node
final Optional<Node> optionalNode;
if (!NodeType.TypeResource.equals(optionalResourceNodeType.get())) {
if (!optionalDataModelURI.isPresent()) {
optionalNode = getResourceNodeHits(optionalResourceURI.get());
} else {
optionalNode = getNodeFromResourcesWDataModelIndex(optionalResourceUriDataModelUriHash.get());
}
} else {
optionalNode = getNodeFromResourceTypesIndex(optionalResourceURI.get());
}
return optionalNode;
}
if (NodeType.Literal.equals(optionalResourceNodeType.get())) {
// literal node - should never be the case
return Optional.empty();
}
// resource must be a blank node
final Node node = bnodes.get(optionalResourceId.get());
return Optional.ofNullable(node);
}
public Optional<Long> determineResourceHash(final Node subjectNode, final Optional<NodeType> optionalSubjectNodeType,
final Optional<Long> optionalSubjectHash, final Optional<Long> optionalResourceHash) {
final long nodeId = subjectNode.getId();
final Optional<Long> finalOptionalResourceHash;
if (nodeResourceMap.containsKey(nodeId)) {
finalOptionalResourceHash = Optional.of(nodeResourceMap.get(nodeId));
} else {
finalOptionalResourceHash = determineResourceHash(optionalSubjectNodeType, optionalSubjectHash, optionalResourceHash);
if (finalOptionalResourceHash.isPresent()) {
nodeResourceMap.put(nodeId, finalOptionalResourceHash.get());
}
}
return finalOptionalResourceHash;
}
public Optional<Long> determineResourceHash(final Optional<NodeType> optionalSubjectNodeType, final Optional<Long> optionalSubjectHash,
final Optional<Long> optionalResourceHash) {
final Optional<Long> finalOptionalResourceHash;
if (optionalSubjectNodeType.isPresent()
&& (NodeType.Resource.equals(optionalSubjectNodeType.get()) || NodeType.TypeResource.equals(optionalSubjectNodeType.get()))) {
finalOptionalResourceHash = optionalSubjectHash;
} else if (optionalResourceHash.isPresent()) {
finalOptionalResourceHash = optionalResourceHash;
} else {
// shouldn't never be the case
return Optional.empty();
}
return finalOptionalResourceHash;
}
public void addLabel(final Node node, final String labelString) {
final Label label = getLabel(labelString);
boolean hit = false;
final Iterable<Label> labels = node.getLabels();
for (final Label lbl : labels) {
if (label.equals(lbl)) {
hit = true;
break;
}
}
if (!hit) {
node.addLabel(label);
addedLabels++;
}
}
public Label getLabel(final String labelString) {
if (!labelCache.containsKey(labelString)) {
labelCache.put(labelString, DynamicLabel.label(labelString));
}
return labelCache.get(labelString);
}
public boolean checkStatementExists(final long hash) throws DMPGraphException {
return !(statementHashes == null && tempStatementHashes == null) && (tempStatementHashes != null && tempStatementHashes.contains(hash)
|| statementHashes != null && statementHashes.contains(hash));
}
public Relationship prepareRelationship(final Node subjectNode, final String predicateURI, final Node objectNode, final long statementUUID,
final Optional<Map<String, Object>> optionalQualifiedAttributes, final Optional<Long> optionalIndex, final VersionHandler versionHandler) {
final RelationshipType relType = DynamicRelationshipType.withName(predicateURI);
final Relationship rel = subjectNode.createRelationshipTo(objectNode, relType);
rel.setProperty(GraphStatics.UUID_PROPERTY, statementUUID);
if (optionalQualifiedAttributes.isPresent()) {
final Map<String, Object> qualifiedAttributes = optionalQualifiedAttributes.get();
if (qualifiedAttributes.containsKey(GraphStatics.ORDER_PROPERTY)) {
rel.setProperty(GraphStatics.ORDER_PROPERTY, qualifiedAttributes.get(GraphStatics.ORDER_PROPERTY));
}
if(optionalIndex.isPresent()) {
rel.setProperty(GraphStatics.INDEX_PROPERTY, optionalIndex.get());
} else if (qualifiedAttributes.containsKey(GraphStatics.INDEX_PROPERTY)) {
rel.setProperty(GraphStatics.INDEX_PROPERTY, qualifiedAttributes.get(GraphStatics.INDEX_PROPERTY));
}
// TODO: versioning handling only implemented for data models right now
if (qualifiedAttributes.containsKey(GraphStatics.EVIDENCE_PROPERTY)) {
rel.setProperty(GraphStatics.EVIDENCE_PROPERTY, qualifiedAttributes.get(GraphStatics.EVIDENCE_PROPERTY));
}
if (qualifiedAttributes.containsKey(GraphStatics.CONFIDENCE_PROPERTY)) {
rel.setProperty(GraphStatics.CONFIDENCE_PROPERTY, qualifiedAttributes.get(GraphStatics.CONFIDENCE_PROPERTY));
}
}
return rel;
}
public long generateStatementHash(final Relationship rel) throws DMPGraphException {
final Node subjectNode = rel.getStartNode();
final Node objectNode = rel.getEndNode();
final String predicateName = rel.getType().name();
final NodeType subjectNodeType = GraphUtils.determineNodeType(subjectNode);
final NodeType objectNodeType = GraphUtils.determineNodeType(objectNode);
return generateStatementHash(subjectNode, predicateName, objectNode, subjectNodeType, objectNodeType);
}
public long generateStatementHash(final Node subjectNode, final String predicateName, final Node objectNode, final NodeType subjectNodeType,
final NodeType objectNodeType) throws DMPGraphException {
final Optional<NodeType> optionalSubjectNodeType = Optional.ofNullable(subjectNodeType);
final Optional<NodeType> optionalObjectNodeType = Optional.ofNullable(objectNodeType);
final Optional<String> optionalSubjectIdentifier = getIdentifier(subjectNode, optionalSubjectNodeType);
final Optional<String> optionalObjectIdentifier = getIdentifier(objectNode, optionalObjectNodeType);
return generateStatementHash(predicateName, optionalSubjectNodeType, optionalObjectNodeType, optionalSubjectIdentifier,
optionalObjectIdentifier);
}
public long generateStatementHash(final Node subjectNode, final Statement statement, final Optional<String> optionalPrefixedPredicateURI)
throws DMPGraphException {
final Optional<NodeType> optionalSubjectNodeType = statement.getOptionalSubjectNodeType();
final Optional<NodeType> optionalObjectNodeType = statement.getOptionalObjectNodeType();
final Optional<String> optionalSubjectIdentifier = getIdentifier(subjectNode, optionalSubjectNodeType);
final Optional<String> optionalObjectIdentifier = statement.getOptionalObjectValue();
final String predicateName = optionalPrefixedPredicateURI.get();
return generateStatementHash(predicateName, optionalSubjectNodeType, optionalObjectNodeType,
optionalSubjectIdentifier,
optionalObjectIdentifier);
}
public long generateStatementHash(final String predicateName, final Optional<NodeType> optionalSubjectNodeType,
final Optional<NodeType> optionalObjectNodeType, final Optional<String> optionalSubjectIdentifier,
final Optional<String> optionalObjectIdentifier) throws DMPGraphException {
if (!optionalSubjectNodeType.isPresent() || !optionalObjectNodeType.isPresent() || !optionalSubjectIdentifier.isPresent()
|| !optionalObjectIdentifier.isPresent()) {
final String message = "cannot generate statement hash, because the subject node type or object node type or subject identifier or object identifier is not present";
BasicNeo4jProcessor.LOG.error(message);
throw new DMPGraphException(message);
}
final String simpleHashString = optionalSubjectNodeType.get().toString() + ":" + optionalSubjectIdentifier.get() + " " + predicateName + " "
+ optionalObjectNodeType.get().toString() + ":" + optionalObjectIdentifier.get();
final String hashString = putSaltToStatementHash(simpleHashString);
return HashUtils.generateHash(hashString);
}
public Optional<String> getIdentifier(final Node node, final Optional<NodeType> optionalNodeType) {
if (!optionalNodeType.isPresent()) {
return Optional.empty();
}
final String identifier;
switch (optionalNodeType.get()) {
case Resource:
case TypeResource:
final String uri = (String) node.getProperty(GraphStatics.URI_PROPERTY, null);
final String dataModel = (String) node.getProperty(GraphStatics.DATA_MODEL_PROPERTY, null);
if (dataModel == null) {
identifier = uri;
} else {
identifier = uri + dataModel;
}
break;
case BNode:
case TypeBNode:
identifier = "" + node.getId();
break;
case Literal:
identifier = (String) node.getProperty(GraphStatics.VALUE_PROPERTY, null);
break;
default:
identifier = null;
break;
}
return Optional.ofNullable(identifier);
}
public abstract void addObjectToResourceWDataModelIndex(final Node node, final String URI, final Optional<String> optionalDataModelURI);
public abstract void handleObjectDataModel(Node node, Optional<String> optionalDataModelURI);
public abstract void handleSubjectDataModel(final Node node, String URI, final Optional<String> optionalDataModelURI);
public abstract Optional<Node> getResourceNodeHits(final String resourceURI);
public abstract long generateResourceHash(final String resourceURI, final Optional<String> dataModelURI);
public Optional<Node> getNodeFromResourcesIndex(final String key) {
return getNodeFromSchemaIndex(key, tempResourcesIndex, GraphProcessingStatics.RESOURCE_LABEL, GraphStatics.URI_PROPERTY);
}
public Optional<Node> getNodeFromResourceTypesIndex(final String key) {
return getNodeFromSchemaIndex(key, tempResourceTypesIndex, GraphProcessingStatics.RESOURCE_TYPE_LABEL, GraphStatics.URI_PROPERTY);
}
public Optional<Node> getNodeFromResourcesWDataModelIndex(final long resourceUriAndDataModelUriHash) {
return getNodeFromLongSchemaIndex(resourceUriAndDataModelUriHash, tempResourcesWDataModelIndex, GraphProcessingStatics.RESOURCE_LABEL,
GraphStatics.HASH);
}
public Optional<Relationship> getRelationshipFromStatementIndex(final Long uuid) {
if (statementUUIDs == null) {
return Optional.empty();
}
final IndexHits<Relationship> hits = statementUUIDs.get(GraphStatics.UUID, uuid);
if (hits != null && hits.hasNext()) {
final Relationship rel = hits.next();
hits.close();
return Optional.of(rel);
}
if (hits != null) {
hits.close();
}
return Optional.empty();
}
public void addNodeToResourcesIndex(final String value, final Node node) {
addNodeToSchemaIndex(value, node, tempResourcesIndex);
}
public void addNodeToResourcesWDataModelIndex(final String resourceUri, final long resourceUriDataModelUriHash, final Node node) {
addNodeToLongSchemaIndex(resourceUriDataModelUriHash, node, tempResourcesWDataModelIndex);
addNodeToResourcesIndex(resourceUri, node);
}
public void addNodeToResourceTypesIndex(final String key, final Node node) {
addNodeToSchemaIndex(key, node, tempResourceTypesIndex);
addNodeToResourcesIndex(key, node);
}
protected abstract String putSaltToStatementHash(final String hash);
protected Optional<Node> getNodeFromSchemaIndex(final String key, final Map<String, Node> tempIndex, final Label nodeLabel,
final String nodeProperty) {
if (tempIndex.containsKey(key)) {
return Optional.of(tempIndex.get(key));
}
final Optional<Node> optionalNode = Optional.ofNullable(database.findNode(nodeLabel, nodeProperty, key));
if (optionalNode.isPresent()) {
tempIndex.put(key, optionalNode.get());
}
return optionalNode;
}
protected Optional<Node> getNodeFromLongSchemaIndex(final long key, final Map<Long, Node> tempIndex, final Label nodeLabel,
final String nodeProperty) {
if (tempIndex.containsKey(key)) {
return Optional.of(tempIndex.get(key));
}
final Optional<Node> optionalNode = Optional.ofNullable(database.findNode(nodeLabel, nodeProperty, key));
if (optionalNode.isPresent()) {
tempIndex.put(key, optionalNode.get());
}
return optionalNode;
}
protected Tuple<Set<Long>, DB> getOrCreateLongIndex(final String name) throws IOException {
final String storeDir = GraphDatabaseUtils.determineMapDBIndexStoreDir(database);
return MapDBUtils.createOrGetPersistentLongIndexTreeSetGlobalTransactional(storeDir + File.separator + name, name);
}
protected Tuple<Map<String, String>, DB> getOrCreateStringStringIndex(final String name) throws IOException {
final String storeDir = GraphDatabaseUtils.determineMapDBIndexStoreDir(database);
return MapDBUtils.createOrGetPersistentStringStringIndexTreeMapGlobalTransactional(storeDir + File.separator + name, name);
}
private void addNodeToSchemaIndex(final String key, final Node node, final Map<String, Node> tempIndex) {
tempIndex.put(key, node);
}
private void addNodeToLongSchemaIndex(final long key, final Node node, final Map<Long, Node> tempIndex) {
tempIndex.put(key, node);
}
private void pumpNFlushStatementIndex() {
LOG.debug("start pump'n'flushing statement index; size = '{}'", tempStatementHashes.size());
for (final Long hash : tempStatementHashes) {
statementHashes.add(hash);
}
LOG.debug("finished pumping statement index");
tempStatementHashesDB.commit();
statementHashesDB.commit();
LOG.debug("finished flushing statement index");
}
private void closeMapDBIndex(final DB mapDBIndex) {
if (mapDBIndex != null && !mapDBIndex.isClosed()) {
try {
mapDBIndex.close();
} catch (final RuntimeException e) {
LOG.error("could not close mapdb index properly.", e);
}
}
}
}