/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.tinkerpop.gremlin.structure.io.graphml; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.Graph; import org.apache.tinkerpop.gremlin.structure.Property; import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.structure.VertexProperty; import org.apache.tinkerpop.gremlin.structure.io.GraphReader; import org.apache.tinkerpop.gremlin.structure.io.Io; import org.apache.tinkerpop.gremlin.structure.util.Attachable; import org.apache.tinkerpop.gremlin.structure.util.ElementHelper; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import javax.xml.stream.events.XMLEvent; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.stream.Stream; /** * GraphMLReader writes the data from a GraphML stream to a graph. Note that this format is lossy, in the sense that data * types and features of Gremlin Structure not supported by GraphML are not serialized. This format is meant for * external export of a graph to tools outside of Gremlin Structure graphs. Note that GraphML does not support * the notion of multi-properties or properties on properties. * * @author Marko A. Rodriguez (http://markorodriguez.com) * @author Alex Averbuch (alex.averbuch@gmail.com) * @author Joshua Shinavier (http://fortytwo.net) * @author Stephen Mallette (http://stephen.genoprime.com) */ public final class GraphMLReader implements GraphReader { private final XMLInputFactory inputFactory = XMLInputFactory.newInstance(); private final String edgeLabelKey; private final String vertexLabelKey; private final long batchSize; private final boolean strict; private GraphMLReader(final Builder builder) { this.edgeLabelKey = builder.edgeLabelKey; this.batchSize = builder.batchSize; this.vertexLabelKey = builder.vertexLabelKey; this.strict = builder.strict; } @Override public void readGraph(final InputStream graphInputStream, final Graph graphToWriteTo) throws IOException { final Map<Object, Vertex> cache = new HashMap<>(); final AtomicLong counter = new AtomicLong(0); final boolean supportsTx = graphToWriteTo.features().graph().supportsTransactions(); final Graph.Features.EdgeFeatures edgeFeatures = graphToWriteTo.features().edge(); final Graph.Features.VertexFeatures vertexFeatures = graphToWriteTo.features().vertex(); try { final XMLStreamReader reader = inputFactory.createXMLStreamReader(graphInputStream); final Map<String, String> keyIdMap = new HashMap<>(); final Map<String, String> keyTypesMaps = new HashMap<>(); // Buffered Vertex Data String vertexId = null; String vertexLabel = null; Map<String, Object> vertexProps = null; boolean isInVertex = false; // Buffered Edge Data String edgeId = null; String edgeLabel = null; Vertex edgeInVertex = null; Vertex edgeOutVertex = null; Map<String, Object> edgeProps = null; boolean isInEdge = false; while (reader.hasNext()) { final Integer eventType = reader.next(); if (eventType.equals(XMLEvent.START_ELEMENT)) { final String elementName = reader.getName().getLocalPart(); switch (elementName) { case GraphMLTokens.KEY: final String id = reader.getAttributeValue(null, GraphMLTokens.ID); final String attributeName = reader.getAttributeValue(null, GraphMLTokens.ATTR_NAME); final String attributeType = reader.getAttributeValue(null, GraphMLTokens.ATTR_TYPE); keyIdMap.put(id, attributeName); keyTypesMaps.put(id, attributeType); break; case GraphMLTokens.NODE: vertexId = reader.getAttributeValue(null, GraphMLTokens.ID); isInVertex = true; vertexProps = new HashMap<>(); break; case GraphMLTokens.EDGE: edgeId = reader.getAttributeValue(null, GraphMLTokens.ID); final String vertexIdOut = reader.getAttributeValue(null, GraphMLTokens.SOURCE); final String vertexIdIn = reader.getAttributeValue(null, GraphMLTokens.TARGET); // graphml allows edges and vertices to be mixed in terms of how they are positioned // in the xml therefore it is possible that an edge is created prior to its definition // as a vertex. edgeOutVertex = findOrCreate(vertexIdOut, graphToWriteTo, vertexFeatures, cache, false); edgeInVertex = findOrCreate(vertexIdIn, graphToWriteTo, vertexFeatures, cache, false); if (supportsTx && counter.incrementAndGet() % batchSize == 0) graphToWriteTo.tx().commit(); isInEdge = true; edgeProps = new HashMap<>(); break; case GraphMLTokens.DATA: final String key = reader.getAttributeValue(null, GraphMLTokens.KEY); final String dataAttributeName = keyIdMap.get(key); if (dataAttributeName != null) { final String value = reader.getElementText(); if (isInVertex) { if (key.equals(vertexLabelKey)) vertexLabel = value; else { try { vertexProps.put(dataAttributeName, typeCastValue(key, value, keyTypesMaps)); } catch (NumberFormatException nfe) { if (strict) throw nfe; } } } else if (isInEdge) { if (key.equals(edgeLabelKey)) edgeLabel = value; else { try { edgeProps.put(dataAttributeName, typeCastValue(key, value, keyTypesMaps)); } catch (NumberFormatException nfe) { if (strict) throw nfe; } } } } break; } } else if (eventType.equals(XMLEvent.END_ELEMENT)) { final String elementName = reader.getName().getLocalPart(); if (elementName.equals(GraphMLTokens.NODE)) { final String currentVertexId = vertexId; final String currentVertexLabel = Optional.ofNullable(vertexLabel).orElse(Vertex.DEFAULT_LABEL); final Object[] propsAsArray = vertexProps.entrySet().stream().flatMap(e -> Stream.of(e.getKey(), e.getValue())).toArray(); findOrCreate(currentVertexId, graphToWriteTo, vertexFeatures, cache, true, ElementHelper.upsert(propsAsArray, T.label, currentVertexLabel)); if (supportsTx && counter.incrementAndGet() % batchSize == 0) graphToWriteTo.tx().commit(); vertexId = null; vertexLabel = null; vertexProps = null; isInVertex = false; } else if (elementName.equals(GraphMLTokens.EDGE)) { final Object[] propsAsArray = edgeProps.entrySet().stream().flatMap(e -> Stream.of(e.getKey(), e.getValue())).toArray(); final Object[] propsReady = null != edgeId && edgeFeatures.willAllowId(edgeId) ? ElementHelper.upsert(propsAsArray, T.id, edgeId) : propsAsArray; edgeOutVertex.addEdge(null == edgeLabel ? Edge.DEFAULT_LABEL : edgeLabel, edgeInVertex, propsReady); if (supportsTx && counter.incrementAndGet() % batchSize == 0) graphToWriteTo.tx().commit(); edgeId = null; edgeLabel = null; edgeOutVertex = null; edgeInVertex = null; edgeProps = null; isInEdge = false; } } } if (supportsTx) graphToWriteTo.tx().commit(); } catch (XMLStreamException xse) { // rollback whatever portion failed if (supportsTx && counter.incrementAndGet() % batchSize == 0) graphToWriteTo.tx().rollback(); throw new IOException(xse); } } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public Iterator<Vertex> readVertices(final InputStream inputStream, final Function<Attachable<Vertex>, Vertex> vertexAttachMethod, final Function<Attachable<Edge>, Edge> edgeAttachMethod, final Direction attachEdgesOfThisDirection) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public Vertex readVertex(final InputStream inputStream, final Function<Attachable<Vertex>, Vertex> vertexAttachMethod) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public Vertex readVertex(final InputStream inputStream, final Function<Attachable<Vertex>, Vertex> vertexAttachMethod, final Function<Attachable<Edge>, Edge> edgeAttachMethod, final Direction attachEdgesOfThisDirection) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public Edge readEdge(final InputStream inputStream, final Function<Attachable<Edge>, Edge> edgeAttachMethod) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public VertexProperty readVertexProperty(final InputStream inputStream, final Function<Attachable<VertexProperty>, VertexProperty> vertexPropertyAttachMethod) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public Property readProperty(final InputStream inputStream, final Function<Attachable<Property>, Property> propertyAttachMethod) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } /** * This method is not supported for this reader. * * @throws UnsupportedOperationException when called. */ @Override public <C> C readObject(final InputStream inputStream, final Class<? extends C> clazz) throws IOException { throw Io.Exceptions.readerFormatIsForFullGraphSerializationOnly(this.getClass()); } private static Vertex findOrCreate(final Object id, final Graph graphToWriteTo, final Graph.Features.VertexFeatures features, final Map<Object, Vertex> cache, final boolean asVertex, final Object... args) { if (cache.containsKey(id)) { // if the request to findOrCreate come from a vertex then AND the vertex was already created, that means // that the vertex was created by an edge that arrived first in the stream (allowable via GraphML // specification). as the edge only carries the vertex id and not its properties, the properties // of the vertex need to be attached at this point. if (asVertex) { final Vertex v = cache.get(id); ElementHelper.attachProperties(v, args); return v; } else { return cache.get(id); } } else { final Object[] argsReady = features.willAllowId(id) ? ElementHelper.upsert(args, T.id, id) : args; final Vertex v = graphToWriteTo.addVertex(argsReady); cache.put(id, v); return v; } } private static Object typeCastValue(final String key, final String value, final Map<String, String> keyTypes) { final String type = keyTypes.get(key); if (null == type || type.equals(GraphMLTokens.STRING)) return value; else if (type.equals(GraphMLTokens.FLOAT)) return Float.valueOf(value); else if (type.equals(GraphMLTokens.INT)) return Integer.valueOf(value); else if (type.equals(GraphMLTokens.DOUBLE)) return Double.valueOf(value); else if (type.equals(GraphMLTokens.BOOLEAN)) return Boolean.valueOf(value); else if (type.equals(GraphMLTokens.LONG)) return Long.valueOf(value); else return value; } public static Builder build() { return new Builder(); } /** * Allows configuration and construction of the GraphMLReader instance. */ public static final class Builder implements ReaderBuilder<GraphMLReader> { private String edgeLabelKey = GraphMLTokens.LABEL_E; private String vertexLabelKey = GraphMLTokens.LABEL_V; private boolean strict = true; private long batchSize = 10000; private Builder() { } /** * When set to true, exceptions will be thrown if a property value cannot be coerced to the expected data * type. If set to false, then the reader will continue with the import but ignore the failed property key. * By default this value is "true". */ public Builder strict(final boolean strict) { this.strict = strict; return this; } /** * The key to use as the edge label. */ public Builder edgeLabelKey(final String edgeLabelKey) { this.edgeLabelKey = edgeLabelKey; return this; } /** * the key to use as the vertex label. */ public Builder vertexLabelKey(final String vertexLabelKey) { this.vertexLabelKey = vertexLabelKey; return this; } /** * Number of mutations to perform before a commit is executed. */ public Builder batchSize(final long batchSize) { this.batchSize = batchSize; return this; } public GraphMLReader create() { return new GraphMLReader(this); } } }