/**
* This file is part of d:swarm graph extension.
*
* d:swarm graph extension is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* d:swarm graph extension is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with d:swarm graph extension. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dswarm.graph.gdm.parse;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction;
import org.neo4j.tooling.GlobalGraphOperations;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.dswarm.graph.DMPGraphException;
import org.dswarm.graph.delta.Changeset;
import org.dswarm.graph.delta.DeltaState;
import org.dswarm.graph.delta.DeltaStatics;
import org.dswarm.graph.delta.util.GraphDBPrintUtil;
import org.dswarm.graph.delta.util.GraphDBUtil;
import org.dswarm.graph.json.Node;
import org.dswarm.graph.json.ResourceNode;
import org.dswarm.graph.json.Statement;
import org.dswarm.graph.model.GraphStatics;
/**
* @author tgaengler
*/
public class GDMChangesetParser implements GDMUpdateParser {
private static final Logger LOG = LoggerFactory.getLogger(GDMChangesetParser.class);
private GDMUpdateHandler gdmHandler;
private final Changeset changeset;
private final long existingResourceHash;
private final GraphDatabaseService existingResourceDB;
private final GraphDatabaseService newResourceDB;
public GDMChangesetParser(final Changeset changesetArg, final long existingResourceHashArg, final GraphDatabaseService existingResourceDBArg,
final GraphDatabaseService newResourceDBArg) {
changeset = changesetArg;
existingResourceHash = existingResourceHashArg;
existingResourceDB = existingResourceDBArg;
newResourceDB = newResourceDBArg;
}
@Override
public void setGDMHandler(final GDMUpdateHandler handler) {
gdmHandler = handler;
}
@Override
public void parse() throws DMPGraphException {
if (changeset == null || existingResourceDB == null || newResourceDB == null) {
LOG.debug("there is no change set or resource working sets");
gdmHandler.getHandler().closeTransaction();
return;
}
LOG.debug("start processing changeset");
// 0. fetch latest version from data model node (+ increase this value + update resource node (maybe at the end))
// 1. compare existing resource DB statements with new resource DB statements, i.e. write/follow statements ordered by
// index property (it might be good to also have the relationship in the permanent DB that is related to this index
// available)
final Transaction newResourceDBTX = newResourceDB.beginTx();
final Transaction existingResourceDBTX = existingResourceDB.beginTx();
try {
final Iterable<Relationship> existingRelationships = GlobalGraphOperations.at(existingResourceDB).getAllRelationships();
final Iterator<Relationship> existingRelationshipsIter = existingRelationships.iterator();
final Iterable<Relationship> newRelationships = GlobalGraphOperations.at(newResourceDB).getAllRelationships();
final Iterator<Relationship> newRelationshipsIter = newRelationships.iterator();
final Set<Long> alreadyAddedStatementUUIDs = new HashSet<>();
final Set<Long> alreadyDeletedStatementUUIDs = new HashSet<>();
final Set<Long> alreadyModifiedExistingStatementUUIDs = new HashSet<>();
final Set<Long> alreadyModifiedNewStatementUUIDs = new HashSet<>();
long index = 1;
Relationship existingRelationship = existingRelationshipsIter.next();
Relationship newRelationship = null;
do {
final Long indexFromDB;
if (existingRelationship != null) {
indexFromDB = (Long) existingRelationship.getProperty(GraphStatics.INDEX_PROPERTY, null);
} else if (newRelationship != null) {
indexFromDB = (Long) newRelationship.getProperty(GraphStatics.INDEX_PROPERTY, null);
} else {
indexFromDB = null;
}
if (indexFromDB == null) {
// TODO: we should probably throw an exception instead
break;
}
if (!newRelationshipsIter.hasNext()) {
// TODO: we should probably throw an exception instead
break;
}
final DeltaState existingRelDeltaState = getDeltaState(existingRelationship);
final DeltaState finalDeltaState;
boolean increaseExistingRelationship = true;
if (existingRelDeltaState == null || DeltaState.ExactMatch.equals(existingRelDeltaState)) {
newRelationship = getNewRel(newRelationshipsIter, alreadyAddedStatementUUIDs, alreadyDeletedStatementUUIDs,
alreadyModifiedNewStatementUUIDs);
if (newRelationship == null) {
break;
}
finalDeltaState = getDeltaState(newRelationship);
} else {
finalDeltaState = existingRelDeltaState;
final boolean stmtAlreadyProcessed = checkStmt(existingRelationship, finalDeltaState, alreadyAddedStatementUUIDs,
alreadyDeletedStatementUUIDs, alreadyModifiedExistingStatementUUIDs);
if (stmtAlreadyProcessed) {
// skip already processed existing stmt
existingRelationship = increaseRelationship(existingRelationshipsIter);
continue;
}
}
switch (finalDeltaState) {
case ADDITION:
final Long newResourceStmtUUID = (Long) newRelationship.getProperty(GraphStatics.UUID_PROPERTY, null);
final Statement addedStatement = changeset.getAdditions().get(newResourceStmtUUID);
// retrieve start node via subject identifier (?) - start node must be a resource node (i.e., we could probably verify this requirement)
// retrieve the resource identifier from the existing resource and replace the subject of the to-be-added statement
if (ResourceNode.class.isInstance(addedStatement.getSubject())) {
final ResourceNode subject = (ResourceNode) addedStatement.getSubject();
final String subjectURI = subject.getUri();
final String prefixedSubjectURI = gdmHandler.getHandler().getProcessor().createPrefixedURI(subjectURI);
final Optional<String> optionalDataModelURI;
if (subject.getDataModel() != null) {
optionalDataModelURI = Optional
.ofNullable(gdmHandler.getHandler().getProcessor().createPrefixedURI(subject.getDataModel()));
} else {
optionalDataModelURI = Optional.empty();
}
final long subjectHash = gdmHandler.getHandler().getProcessor()
.generateResourceHash(prefixedSubjectURI, optionalDataModelURI);
if (existingResourceHash != subjectHash) {
// TODO: do something, e.g., replace subject of the to-be-added statement;
break;
}
}
final Long addedStmtUUID = Long.valueOf(addedStatement.getUUID());
gdmHandler.handleStatement(addedStatement, existingResourceHash, index);
alreadyAddedStatementUUIDs.add(addedStmtUUID);
// simply increase the index?
index++;
increaseExistingRelationship = false;
break;
case DELETION:
final Long existingResourceStmtUUID = (Long) existingRelationship.getProperty(GraphStatics.UUID_PROPERTY, null);
// note: we don't need to retrieve the stmt from the changeset, we just need the uuid of it
// final Statement deletedStatement = changeset.getDeletions().get(existingResourceStmtUUID);
final RelationshipType relType = existingRelationship.getType();
final String relTypeName = relType.name();
// skip rdf:type rels, since they do not exist in the permanent graph
if (!relTypeName.equals(GraphDBUtil.RDF_TYPE_REL_TYPE.name())) {
// utilise statement uuid from existing statement to deprecate it
gdmHandler.deprecateStatement(existingResourceStmtUUID);
}
alreadyDeletedStatementUUIDs.add(existingResourceStmtUUID);
break;
case MODIFICATION:
final Long modifiedNodeId = changeset.getModifications().get(existingRelationship.getEndNode().getId());
final Statement modifiedStatement = changeset.getNewModifiedStatements().get(modifiedNodeId);
final Statement finalModifiedStatement;
final Long existingModifiedStmtUUID;
if (modifiedStatement != null) {
finalModifiedStatement = modifiedStatement;
existingModifiedStmtUUID = (Long) existingRelationship.getProperty(GraphStatics.UUID_PROPERTY, null);
} else {
final Long newModifiedNodeId = newRelationship.getEndNode().getId();
finalModifiedStatement = changeset.getNewModifiedStatements().get(newModifiedNodeId);
Long existingModifiedNodeId = null;
for (final Map.Entry<Long, Long> modificationsEntry : changeset.getModifications().entrySet()) {
if (modificationsEntry.getValue().equals(newModifiedNodeId)) {
existingModifiedNodeId = modificationsEntry.getKey();
break;
}
}
if (existingModifiedNodeId == null) {
// TODO: do something, e.g., throw an exception
break;
}
final Statement existingModifiedStatement = changeset.getExistingModifiedStatements().get(existingModifiedNodeId);
existingModifiedStmtUUID = Long.valueOf(existingModifiedStatement.getUUID());
increaseExistingRelationship = false;
}
if (finalModifiedStatement == null) {
// TODO: do something, e.g., throw an exception
break;
}
// utilise statement uuid from existing statenent to deprecate it + to get modification (?)
final Node subject = gdmHandler.deprecateStatement(existingModifiedStmtUUID);
alreadyModifiedExistingStatementUUIDs.add(existingModifiedStmtUUID);
// take subject from existing resource to append the statement on the correct position
finalModifiedStatement.setSubject(subject);
gdmHandler.handleStatement(finalModifiedStatement, existingResourceHash, index);
alreadyModifiedNewStatementUUIDs.add(Long.valueOf(finalModifiedStatement.getUUID()));
index++;
break;
}
if (!finalDeltaState.equals(DeltaState.ExactMatch)) {
if (increaseExistingRelationship) {
existingRelationship = increaseRelationship(existingRelationshipsIter);
}
continue;
}
if (newRelationship == null) {
newRelationship = increaseRelationship(newRelationshipsIter);
}
final String existingRelationshipPrint = GraphDBPrintUtil.printRelationship(existingRelationship);
final String newRelationshipPrint = GraphDBPrintUtil.printRelationship(newRelationship);
if (!(existingRelationshipPrint.equals(newRelationshipPrint) && index == indexFromDB)) {
// note: we don't really know how equal/unequal the statements are at this moment, so it's better to compare them more in detail (? - once again?) - we could also hold a map of exact matched statements
// deprecate old statement and write it as new statement with a different index
final Long existingStmtUUID = (Long) existingRelationship.getProperty(GraphStatics.UUID_PROPERTY, null);
final Long newStmtOrder = (Long) newRelationship.getProperty(GraphStatics.ORDER_PROPERTY, null);
final long finalNewStmtOrder;
if (newStmtOrder != null) {
finalNewStmtOrder = newStmtOrder;
} else {
finalNewStmtOrder = (long) 1;
}
final RelationshipType relType = existingRelationship.getType();
final String relTypeName = relType.name();
// skip rdf:type rels, since they do not exist in the permanent graph
if (!relTypeName.equals(GraphDBUtil.RDF_TYPE_REL_TYPE.name())) {
gdmHandler.deprecateStatement(existingStmtUUID);
gdmHandler.handleStatement(existingStmtUUID, existingResourceHash, index, finalNewStmtOrder);
}
}
index++;
existingRelationship = increaseRelationship(existingRelationshipsIter);
} while (newRelationshipsIter.hasNext() || existingRelationshipsIter.hasNext());
// System.out.println("index = '" + (index -1) + "'");
newResourceDBTX.success();
existingResourceDBTX.success();
} catch (final Exception e) {
GDMChangesetParser.LOG.error("couldn't write changeset successfully to graph DB", e);
newResourceDBTX.failure();
existingResourceDBTX.failure();
} finally {
newResourceDBTX.close();
existingResourceDBTX.close();
}
// 1.1 if a statement was added or deleted or the printed version doesn't equal, rewrite all following statements
// afterwards and deprecated the existing ones (i.e. update their valid to value)
// for added + modified statements utilise the current version for valid from
LOG.debug("finished processing changeset");
}
private DeltaState getDeltaState(final Relationship relationship) {
if (relationship == null) {
return null;
}
final String deltaStateString = (String) relationship.getProperty(DeltaStatics.DELTA_STATE_PROPERTY, null);
return DeltaState.getByName(deltaStateString);
}
private boolean checkStmt(final Relationship rel, final DeltaState deltaState, final Set<Long> alreadyAddedStatementUUIDs,
final Set<Long> alreadyDeletedStatementUUIDs, final Set<Long> alreadyModifiedStatementUUIDs) {
if (rel == null) {
return false;
}
final Long newStmtUUID = (Long) rel.getProperty(GraphStatics.UUID_PROPERTY, null);
boolean stmtAlreadyProcessed = false;
switch (deltaState) {
case ADDITION:
if (alreadyAddedStatementUUIDs.contains(newStmtUUID)) {
stmtAlreadyProcessed = true;
}
break;
case DELETION:
if (alreadyDeletedStatementUUIDs.contains(newStmtUUID)) {
stmtAlreadyProcessed = true;
}
break;
case MODIFICATION:
if (alreadyModifiedStatementUUIDs.contains(newStmtUUID)) {
stmtAlreadyProcessed = true;
}
break;
}
return stmtAlreadyProcessed;
}
private Relationship getNewRel(final Iterator<Relationship> newRelationshipsIter, final Set<Long> alreadyAddedStatementUUIDs,
final Set<Long> alreadyDeletedStatementUUIDs, final Set<Long> alreadyModifiedNewStatementUUIDs) {
final Relationship newRelationship = increaseRelationship(newRelationshipsIter);
final DeltaState deltaState = getDeltaState(newRelationship);
final boolean stmtAlreadyProcessed = checkStmt(newRelationship, deltaState, alreadyAddedStatementUUIDs, alreadyDeletedStatementUUIDs,
alreadyModifiedNewStatementUUIDs);
if (stmtAlreadyProcessed) {
// skip new rel and fetch next one
return getNewRel(newRelationshipsIter, alreadyAddedStatementUUIDs, alreadyDeletedStatementUUIDs, alreadyModifiedNewStatementUUIDs);
}
return newRelationship;
}
private Relationship increaseRelationship(final Iterator<Relationship> relationshipIterator) {
if (relationshipIterator.hasNext()) {
return relationshipIterator.next();
}
return null;
}
}