/**
* This file is part of d:swarm graph extension.
*
* d:swarm graph extension is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* d:swarm graph extension is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with d:swarm graph extension. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dswarm.graph.xml.read;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import com.google.common.base.Charsets;
import com.google.common.base.Optional;
import com.google.common.collect.Iterators;
import org.apache.jena.vocabulary.RDF;
import org.codehaus.stax2.XMLOutputFactory2;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.PropertyContainer;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.dswarm.common.DMPStatics;
import org.dswarm.common.model.Attribute;
import org.dswarm.common.model.AttributePath;
import org.dswarm.common.types.Tuple;
import org.dswarm.common.web.URI;
import org.dswarm.common.xml.utils.XMLStreamWriterUtils;
import org.dswarm.graph.DMPGraphException;
import org.dswarm.graph.gdm.read.PropertyGraphGDMReaderHelper;
import org.dswarm.graph.index.NamespaceIndex;
import org.dswarm.graph.json.LiteralNode;
import org.dswarm.graph.json.NodeType;
import org.dswarm.graph.json.Predicate;
import org.dswarm.graph.model.GraphStatics;
import org.dswarm.graph.tx.TransactionHandler;
import org.dswarm.graph.versioning.Range;
import org.dswarm.graph.versioning.VersioningStatics;
import org.dswarm.graph.versioning.utils.GraphVersionUtils;
/**
* @author tgaengler
*/
public class PropertyGraphXMLReader implements XMLReader {
private static final Comparator<Relationship> BY_INDEX_PROPERTY = new Comparator<Relationship>() {
@Override
public int compare(final Relationship o1, final Relationship o2) {
final Long o1Index = (Long) o1.getProperty(GraphStatics.INDEX_PROPERTY);
final Long o2Index = (Long) o2.getProperty(GraphStatics.INDEX_PROPERTY);
return o1Index.compareTo(o2Index);
}
};
private static final Logger LOG = LoggerFactory.getLogger(PropertyGraphXMLReader.class);
/**
* TODO: shall we produce XML 1.0 or XML 1.1?
*/
private static final String XML_VERSION = "1.0";
private static final XMLOutputFactory2 xmlOutputFactory;
static {
System.setProperty("javax.xml.stream.XMLOutputFactory", "com.fasterxml.aalto.stax.OutputFactoryImpl");
xmlOutputFactory = (XMLOutputFactory2) XMLOutputFactory.newFactory();
xmlOutputFactory.configureForSpeed();
}
private final String dataModelUri;
private final String recordClassURIString;
private final URI recordClassURI;
private final URI recordTagURI;
private final Optional<AttributePath> optionalRootAttributePath;
private final Map<String, Tuple<Predicate, URI>> predicates = new HashMap<>();
private final Map<String, String> namespacesPrefixesMap = new HashMap<>();
private final Map<String, String> nameMap = new HashMap<>();
private final GraphDatabaseService database;
private final NamespaceIndex namespaceIndex;
private final String prefixedDataModel;
private long recordCount = 0;
private final Integer version;
private final boolean allVersions;
private final boolean originalDataTypeIsXML;
private boolean isElementOpen = false;
private final TransactionHandler tx;
public PropertyGraphXMLReader(final Optional<AttributePath> optionalRootAttributePathArg, final Optional<String> optionalRecordTagArg,
final String recordClassUriArg, final String dataModelUriArg, final Optional<Integer> optionalVersionArg, final Optional<Boolean> optionalAllVersions, final Optional<String> optionalOriginalDataType,
final GraphDatabaseService databaseArg, final TransactionHandler txArg, final NamespaceIndex namespaceIndexArg) throws DMPGraphException {
optionalRootAttributePath = optionalRootAttributePathArg;
recordClassURIString = recordClassUriArg;
recordClassURI = new URI(recordClassURIString);
if (optionalRecordTagArg.isPresent()) {
recordTagURI = new URI(optionalRecordTagArg.get());
} else {
// record class URI as fall back
recordTagURI = new URI(recordClassUriArg);
}
dataModelUri = dataModelUriArg;
database = databaseArg;
tx = txArg;
namespaceIndex = namespaceIndexArg;
if(optionalAllVersions.isPresent()) {
version = -1;
allVersions = true;
} else if (optionalVersionArg.isPresent()) {
version = optionalVersionArg.get();
allVersions = false;
} else {
allVersions = false;
final boolean createdNewTx = tx.ensureRunningTx();
PropertyGraphXMLReader.LOG.debug("start read XML TX (createdNewTx = '{}')", createdNewTx);
try {
version = GraphVersionUtils.getLatestVersion(dataModelUri, database);
} catch (final Exception e) {
final String message = "couldn't retrieve latest version successfully";
PropertyGraphXMLReader.LOG.error(message, e);
PropertyGraphXMLReader.LOG.debug("couldn't finish read XML TX successfully");
tx.failTx();
throw new DMPGraphException(message);
}
}
originalDataTypeIsXML = optionalOriginalDataType.isPresent() && DMPStatics.XML_DATA_TYPE.equals(optionalOriginalDataType.get());
prefixedDataModel = namespaceIndex.createPrefixedURI(dataModelUri);
}
@Override
public Optional<XMLStreamWriter> read(final OutputStream outputStream) throws DMPGraphException, XMLStreamException {
try {
final boolean createdNewTx = tx.ensureRunningTx();
PropertyGraphXMLReader.LOG.debug("start read XML TX (createdNewTx = '{}')", createdNewTx);
} catch (final Exception e) {
final String message = "couldn't acquire tx successfully";
PropertyGraphXMLReader.LOG.error(message, e);
PropertyGraphXMLReader.LOG.debug("couldn't finish read XML TX successfully");
throw new DMPGraphException(message);
}
ResourceIterator<Node> recordNodesIter = null;
try {
final String prefixedURI = namespaceIndex.createPrefixedURI(recordClassURIString);
final Label recordClassLabel = DynamicLabel.label(prefixedURI);
// TODO: refactor this to #findNodes + something else, then counting over the iterator
recordNodesIter = database.findNodes(
recordClassLabel,
GraphStatics.DATA_MODEL_PROPERTY,
prefixedDataModel);
if (recordNodesIter == null) {
tx.succeedTx();
PropertyGraphXMLReader.LOG.debug("there are no root nodes for '{}' in data model '{}'finished read XML TX successfully", recordClassLabel, dataModelUri);
return Optional.absent();
}
if (!recordNodesIter.hasNext()) {
recordNodesIter.close();
tx.succeedTx();
PropertyGraphXMLReader.LOG.debug("there are no root nodes for '{}' in data model '{}'finished read XML TX successfully", recordClassLabel, dataModelUri);
return Optional.absent();
}
final Node firstRecord = recordNodesIter.next();
final Iterator<Node> nodeIterator = Iterators.concat(Iterators.forArray(firstRecord), recordNodesIter);
final boolean hasAtLeasTwoRecords = recordNodesIter.hasNext();
// (XMLStreamWriter2)
final XMLStreamWriter writer = xmlOutputFactory.createXMLStreamWriter(outputStream);
writer.writeStartDocument(Charsets.UTF_8.toString(), XML_VERSION);
boolean defaultNamespaceWritten = false;
if (optionalRootAttributePath.isPresent()) {
// open root attribute path tags
for (final Attribute attribute : optionalRootAttributePath.get().getAttributes()) {
final URI attributeURI = new URI(attribute.getUri());
if (!defaultNamespaceWritten && attributeURI.hasNamespaceURI()) {
// set default namespace
writer.setDefaultNamespace(attributeURI.getNamespaceURI());
defaultNamespaceWritten = true;
}
XMLStreamWriterUtils.writeXMLElementTag(writer, attributeURI, namespacesPrefixesMap, nameMap, isElementOpen);
isElementOpen = true;
}
} else if (hasAtLeasTwoRecords) {
// write default root
final URI defaultRootURI = new URI(recordTagURI + "s");
determineAndWriteXMLElementAndNamespace(defaultRootURI, writer);
}
if (!defaultNamespaceWritten && recordTagURI.hasNamespaceURI()) {
// set default namespace
setDefaultNamespace(writer);
}
final XMLRelationshipHandler relationshipHandler;
if (originalDataTypeIsXML) {
relationshipHandler = new CBDRelationshipXMLDataModelHandler(writer);
} else {
relationshipHandler = new CBDRelationshipHandler(writer);
}
// note: relationship handler knows this node handler
//noinspection unused
final CBDNodeHandler connectRelsAndNodeHandler = new CBDNodeHandler(relationshipHandler);
final XMLNodeHandler startNodeHandler = new CBDStartNodeHandler(relationshipHandler);
// iterate over the records
while (nodeIterator.hasNext()) {
final Node recordNode = nodeIterator.next();
final String resourceUri = (String) recordNode.getProperty(GraphStatics.URI_PROPERTY, null);
if (resourceUri == null) {
LOG.debug("there is no resource URI at record node '{}'", recordNode.getId());
continue;
}
determineAndWriteXMLElementAndNamespace(recordTagURI, writer);
startNodeHandler.handleNode(recordNode);
// close record
writer.writeEndElement();
isElementOpen = false;
recordCount++;
}
recordNodesIter.close();
tx.succeedTx();
PropertyGraphXMLReader.LOG.debug("finished read XML TX successfully");
if (optionalRootAttributePath.isPresent()) {
// close root attribute path tags
for (int i = 0; i < optionalRootAttributePath.get().getAttributes().size(); i++) {
writer.writeEndElement();
}
} else if (hasAtLeasTwoRecords) {
// close default root
writer.writeEndElement();
}
// close document
writer.writeEndDocument();
return Optional.of(writer);
} catch (final Exception e) {
PropertyGraphXMLReader.LOG.error("couldn't finished read XML TX successfully", e);
if (recordNodesIter != null) {
recordNodesIter.close();
}
tx.failTx();
}
return Optional.absent();
}
private void setDefaultNamespace(final XMLStreamWriter writer) throws XMLStreamException {
// TODO: shall we cut the last character?
final String defaultNameSpace;
if (recordTagURI.hasNamespaceURI()) {
defaultNameSpace = XMLStreamWriterUtils.determineBaseURI(recordTagURI);
} else {
defaultNameSpace = XMLStreamWriterUtils.determineBaseURI(recordClassURI);
}
writer.setDefaultNamespace(defaultNameSpace);
}
private URI determineAndWriteXMLElementAndNamespace(final URI uri, final XMLStreamWriter writer) throws XMLStreamException {
final String prefix;
final String namespace;
final String finalURIString;
final boolean namespaceAlreadySet;
if (uri.hasNamespaceURI()) {
namespace = XMLStreamWriterUtils.determineBaseURI(uri);
namespaceAlreadySet = namespacesPrefixesMap.containsKey(namespace);
prefix = XMLStreamWriterUtils.getPrefix(namespace, namespacesPrefixesMap);
finalURIString = uri.getNamespaceURI() + uri.getLocalName();
} else {
namespace = XMLStreamWriterUtils.determineBaseURI(recordClassURI);
namespaceAlreadySet = namespacesPrefixesMap.containsKey(namespace);
prefix = XMLStreamWriterUtils.getPrefix(namespace, namespacesPrefixesMap);
finalURIString = recordClassURI.getNamespaceURI() + uri.getLocalName();
}
final URI finalURI = new URI(finalURIString);
// open record XML tag
XMLStreamWriterUtils.writeXMLElementTag(writer, finalURI, namespacesPrefixesMap, nameMap, isElementOpen);
isElementOpen = true;
// TODO: shall we cut the last character?
// TODO: shall we write the default namespace?
// writer.writeDefaultNamespace(recordTagURI.getNamespaceURI().substring(0,
// recordTagURI.getNamespaceURI().length() - 1));
if (!namespaceAlreadySet) {
writer.writeNamespace(prefix, namespace);
}
return finalURI;
}
@Override
public long recordCount() {
return recordCount;
}
private static boolean hasValidVersion(final Integer version, final PropertyContainer relationship) {
final Integer validFrom = (Integer) relationship.getProperty(VersioningStatics.VALID_FROM_PROPERTY, null);
// TODO: require non null later, when every stmt is versioned
if (validFrom == null) {
return true;
}
final Integer validTo = (Integer) relationship.getProperty(VersioningStatics.VALID_TO_PROPERTY, null);
return validTo == null || Range.range(validFrom, validTo).contains(version);
}
private static Iterable<Relationship> getSortedOutgoings(final Node node) {
final Iterable<Relationship> relationships = node.getRelationships(Direction.OUTGOING);
if (relationships == null) {
return Collections.emptyList();
}
final Iterator<Relationship> relationshipIterator = relationships.iterator();
final List<Relationship> sortedRels = new ArrayList<>();
Iterators.addAll(sortedRels, relationshipIterator);
// sort rels by index value
// TODO: what should we do, if index is null (currently, the case for import via RDF)
Collections.sort(sortedRels, BY_INDEX_PROPERTY);
return sortedRels;
}
private class CBDNodeHandler implements XMLNodeHandler {
private final XMLRelationshipHandler relationshipHandler;
protected CBDNodeHandler(final XMLRelationshipHandler relationshipHandlerArg) {
relationshipHandler = relationshipHandlerArg;
((CBDRelationshipHandler) relationshipHandler).setNodeHandler(this);
}
@Override
public void handleNode(final Node node) throws DMPGraphException, XMLStreamException {
// TODO: find a better way to determine the end of a resource description, e.g., add a property "resource" to each
// node that holds the uri of the resource (record)
// => maybe we should find an appropriated cypher query as replacement for this processing
if (!node.hasProperty(GraphStatics.URI_PROPERTY)) {
for (final Relationship relationship : getSortedOutgoings(node)) {
if (allVersions || hasValidVersion(version, relationship)) {
relationshipHandler.handleRelationship(relationship);
}
}
}
}
}
private class CBDStartNodeHandler implements XMLNodeHandler {
private final XMLRelationshipHandler relationshipHandler;
protected CBDStartNodeHandler(final XMLRelationshipHandler relationshipHandlerArg) {
relationshipHandler = relationshipHandlerArg;
}
@Override
public void handleNode(final Node node) throws DMPGraphException, XMLStreamException {
// TODO: find a better way to determine the end of a resource description, e.g., add a property "resource" to each
// (this is the case for model that came as GDM JSON)
// node that holds the uri of the resource (record)
if (node.hasProperty(GraphStatics.URI_PROPERTY)) {
for (final Relationship relationship : getSortedOutgoings(node)) {
if (allVersions || hasValidVersion(version, relationship)) {
relationshipHandler.handleRelationship(relationship);
}
}
}
}
}
/**
* Default handling: don't export RDF types and write literal objects as XML elements.
*/
protected class CBDRelationshipHandler implements XMLRelationshipHandler {
private final PropertyGraphGDMReaderHelper propertyGraphGDMReaderHelper = new PropertyGraphGDMReaderHelper(namespaceIndex);
protected final XMLStreamWriter writer;
private XMLNodeHandler nodeHandler;
protected CBDRelationshipHandler(final XMLStreamWriter writerArg) {
writer = writerArg;
}
protected void setNodeHandler(final XMLNodeHandler nodeHandlerArg) {
nodeHandler = nodeHandlerArg;
}
@Override
public void handleRelationship(final Relationship rel) throws DMPGraphException, XMLStreamException {
// note: we can also optionally check for the "resource property at the relationship (this property will only be
// written right now for model that came as GDM JSON)
if (rel.getProperty(GraphStatics.DATA_MODEL_PROPERTY).equals(prefixedDataModel)) {
// subject => start element (???)
// final Node subjectNode = rel.getStartNode();
// final org.dswarm.graph.json.Node subjectGDMNode = propertyGraphGDMReaderHelper.readSubject(subjectNode);
// => TODO, we need to compare the node, with the previous node, to write the content
// (key(predicate)/value(object)) into the current element or another of this tag
// TODO: how to determine, when we should close a tag (or parent tag etc.) => we need to keep a stack, of open
// elements
// predicate => XML element or XML attribute
final String predicateString = namespaceIndex.createFullURI(rel.getType().name());
final Tuple<Predicate, URI> predicateTuple = getPredicate(predicateString);
final URI predicateURI = predicateTuple.v2();
// object => XML Element value or XML attribute value or further recursion
final Node objectNode = rel.getEndNode();
final org.dswarm.graph.json.Node objectGDMNode = propertyGraphGDMReaderHelper.readObject(objectNode);
writeKeyValue(predicateURI, objectGDMNode);
// note: we can only iterate deeper into one direction, i.e., we need to cut the stream, when the object is
// another resource => i.e. we iterate only when object are bnodes
// TODO: what should we do with objects that are resources?
if (objectGDMNode.getType() == NodeType.BNode) {
// open tag
XMLStreamWriterUtils.writeXMLElementTag(writer, predicateURI, namespacesPrefixesMap, nameMap, isElementOpen);
isElementOpen = true;
// continue traversal with object node
nodeHandler.handleNode(rel.getEndNode());
// close
writer.writeEndElement();
isElementOpen = false;
}
}
}
protected void writeKeyValue(final URI predicateURI, final org.dswarm.graph.json.Node objectGDMNode) throws XMLStreamException {
// default handling: don't export RDF types and write literal objects as XML elements
if (!RDF.type.getURI().equals(predicateURI.toString()) && NodeType.Literal == objectGDMNode.getType()) {
// open tag
XMLStreamWriterUtils.writeXMLElementTag(writer, predicateURI, namespacesPrefixesMap, nameMap, isElementOpen);
writer.writeCData(((LiteralNode) objectGDMNode).getValue());
// close
writer.writeEndElement();
isElementOpen = false;
} else {
// TODO: ???
}
}
}
/**
* Treat non-rdf:value/non-rdf:type statements with literal objects as XML attributes and rdf:value statements with literal
* objects as XML elements.
*/
private class CBDRelationshipXMLDataModelHandler extends CBDRelationshipHandler {
protected CBDRelationshipXMLDataModelHandler(final XMLStreamWriter writerArg) {
super(writerArg);
}
@Override
protected void writeKeyValue(final URI predicateURI, final org.dswarm.graph.json.Node objectGDMNode) throws XMLStreamException {
if (!(RDF.type.getURI().equals(predicateURI.toString()) || RDF.value.getURI().equals(predicateURI.toString()))
&& NodeType.Literal == objectGDMNode.getType()) {
// predicate is an XML Attribute => write XML Attribute to this XML Element
XMLStreamWriterUtils
.writeXMLAttribute(writer, predicateURI, ((LiteralNode) objectGDMNode).getValue(), namespacesPrefixesMap, nameMap);
} else if (RDF.value.getURI().equals(predicateURI.toString()) && NodeType.Literal == objectGDMNode.getType()) {
// predicate is an XML Element
// TODO: what should we do with objects that are resources?
writer.writeCData(((LiteralNode) objectGDMNode).getValue());
} else {
// ??? - log these occurrences?
}
}
}
private Tuple<Predicate, URI> getPredicate(final String predicateString) {
if (!predicates.containsKey(predicateString)) {
predicates.put(predicateString, Tuple.tuple(new Predicate(predicateString), new URI(predicateString)));
}
return predicates.get(predicateString);
}
}