/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.flink.model.impl; import org.apache.commons.lang.NotImplementedException; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.GroupReduceFunction; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple3; import org.apache.flink.api.java.typeutils.TypeExtractor; import org.gradoop.common.model.impl.pojo.GraphElement; import org.gradoop.common.model.impl.pojo.GraphHead; import org.gradoop.common.model.impl.id.GradoopId; import org.gradoop.common.model.impl.id.GradoopIdList; import org.gradoop.common.model.impl.pojo.Edge; import org.gradoop.common.model.impl.pojo.Vertex; import org.gradoop.common.util.Order; import org.gradoop.flink.io.api.DataSink; import org.gradoop.flink.model.api.functions.GraphHeadReduceFunction; import org.gradoop.flink.model.api.operators.ApplicableUnaryGraphToGraphOperator; import org.gradoop.flink.model.api.operators.BinaryCollectionToCollectionOperator; import org.gradoop.flink.model.api.operators.GraphCollectionOperators; import org.gradoop.flink.model.api.operators.ReducibleBinaryGraphToGraphOperator; import org.gradoop.flink.model.api.operators.UnaryCollectionToCollectionOperator; import org.gradoop.flink.model.api.operators.UnaryCollectionToGraphOperator; import org.gradoop.flink.model.impl.functions.bool.Not; import org.gradoop.flink.model.impl.functions.bool.Or; import org.gradoop.flink.model.impl.functions.bool.True; import org.gradoop.flink.model.impl.functions.epgm.BySameId; import org.gradoop.flink.model.impl.functions.epgm.GraphElementExpander; import org.gradoop.flink.model.impl.functions.epgm.GraphTransactionTriple; import org.gradoop.flink.model.impl.functions.epgm.GraphVerticesEdges; import org.gradoop.flink.model.impl.functions.epgm.Id; import org.gradoop.flink.model.impl.functions.epgm.TransactionEdges; import org.gradoop.flink.model.impl.functions.epgm.TransactionFromSets; import org.gradoop.flink.model.impl.functions.epgm.TransactionGraphHead; import org.gradoop.flink.model.impl.functions.epgm.TransactionVertices; import org.gradoop.flink.model.impl.functions.graphcontainment.InAnyGraph; import org.gradoop.flink.model.impl.functions.graphcontainment.InGraph; import org.gradoop.flink.model.impl.functions.utils.Cast; import org.gradoop.flink.model.impl.functions.utils.First; import org.gradoop.flink.model.impl.operators.difference.Difference; import org.gradoop.flink.model.impl.operators.difference.DifferenceBroadcast; import org.gradoop.flink.model.impl.operators.distinction.DistinctById; import org.gradoop.flink.model.impl.operators.distinction.DistinctByIsomorphism; import org.gradoop.flink.model.impl.operators.distinction.GroupByIsomorphism; import org.gradoop.flink.model.impl.operators.equality.CollectionEquality; import org.gradoop.flink.model.impl.operators.equality.CollectionEqualityByGraphIds; import org.gradoop.flink.model.impl.operators.intersection.Intersection; import org.gradoop.flink.model.impl.operators.intersection.IntersectionBroadcast; import org.gradoop.flink.model.impl.operators.limit.Limit; import org.gradoop.flink.model.impl.operators.matching.transactional.algorithm.PatternMatchingAlgorithm; import org.gradoop.flink.model.impl.operators.matching.transactional.TransactionalPatternMatching; import org.gradoop.flink.model.impl.operators.selection.Selection; import org.gradoop.flink.model.impl.operators.tostring.functions.EdgeToDataString; import org.gradoop.flink.model.impl.operators.tostring.functions.EdgeToIdString; import org.gradoop.flink.model.impl.operators.tostring.functions.GraphHeadToDataString; import org.gradoop.flink.model.impl.operators.tostring.functions.GraphHeadToEmptyString; import org.gradoop.flink.model.impl.operators.tostring.functions.VertexToDataString; import org.gradoop.flink.model.impl.operators.tostring.functions.VertexToIdString; import org.gradoop.flink.model.impl.operators.union.Union; import org.gradoop.flink.representation.transactional.GraphTransaction; import org.gradoop.flink.util.GradoopFlinkConfig; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Set; import static org.apache.flink.shaded.com.google.common.base.Preconditions.checkNotNull; /** * Represents a collection of graphs inside the EPGM. As graphs may share * vertices and edges, the collections contains a single gelly graph * representing all subgraphs. Graph data is stored in an additional dataset. */ public class GraphCollection extends GraphBase implements GraphCollectionOperators { /** * Creates a graph collection from the given arguments. * * @param graphHeads graph heads * @param vertices vertices * @param edges edges * @param config Gradoop Flink configuration */ private GraphCollection(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices, DataSet<Edge> edges, GradoopFlinkConfig config) { super(graphHeads, vertices, edges, config); } //---------------------------------------------------------------------------- // Factory methods //---------------------------------------------------------------------------- /** * Creates an empty graph collection. * * @param config Gradoop Flink configuration * @return empty graph collection */ public static GraphCollection createEmptyCollection( GradoopFlinkConfig config) { Collection<GraphHead> graphHeads = new ArrayList<>(); Collection<Vertex> vertices = new ArrayList<>(); Collection<Edge> edges = new ArrayList<>(); return GraphCollection.fromCollections(graphHeads, vertices, edges, config); } /** * Creates a graph collection from the given arguments. * * @param graphHeads GraphHead DataSet * @param vertices Vertex DataSet * @param config Gradoop Flink configuration * @return Graph collection */ public static GraphCollection fromDataSets(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices, GradoopFlinkConfig config) { return fromDataSets( graphHeads, vertices, createEdgeDataSet(new ArrayList<>(0), config), config ); } /** * Creates a graph collection from the given arguments. * * @param graphHeads GraphHead DataSet * @param vertices Vertex DataSet * @param edges Edge DataSet * @param config Gradoop Flink configuration * @return Graph collection */ public static GraphCollection fromDataSets(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices, DataSet<Edge> edges, GradoopFlinkConfig config) { checkNotNull(graphHeads, "GraphHead DataSet was null"); checkNotNull(vertices, "Vertex DataSet was null"); checkNotNull(edges, "Edge DataSet was null"); checkNotNull(config, "Config was null"); return new GraphCollection(graphHeads, vertices, edges, config); } /** * Creates a new graph collection from the given collection. * * @param graphHeads Graph Head collection * @param vertices Vertex collection * @param edges Edge collection * @param config Gradoop Flink configuration * @return Graph collection */ public static GraphCollection fromCollections( Collection<GraphHead> graphHeads, Collection<Vertex> vertices, Collection<Edge> edges, GradoopFlinkConfig config) { checkNotNull(graphHeads, "GraphHead collection was null"); checkNotNull(vertices, "Vertex collection was null"); checkNotNull(edges, "Vertex collection was null"); checkNotNull(config, "Config was null"); return fromDataSets( createGraphHeadDataSet(graphHeads, config), createVertexDataSet(vertices, config), createEdgeDataSet(edges, config), config ); } /** * Creates a graph collection from a given logical graph. * * @param logicalGraph input graph * @return 1-element graph collection */ public static GraphCollection fromGraph(LogicalGraph logicalGraph) { return fromDataSets( logicalGraph.getGraphHead(), logicalGraph.getVertices(), logicalGraph.getEdges(), logicalGraph.getConfig() ); } //---------------------------------------------------------------------------- // Logical Graph / Graph Head Getters //---------------------------------------------------------------------------- /** * {@inheritDoc} */ public DataSet<GraphHead> getGraphHeads() { return super.getGraphHeads(); } /** * {@inheritDoc} */ @Override public LogicalGraph getGraph(final GradoopId graphID) { // filter vertices and edges based on given graph id DataSet<GraphHead> graphHead = getGraphHeads() .filter(new BySameId<>(graphID)); DataSet<Vertex> vertices = getVertices() .filter(new InGraph<>(graphID)); DataSet<Edge> edges = getEdges() .filter(new InGraph<>(graphID)); return LogicalGraph.fromDataSets(graphHead, vertices, edges, getConfig()); } /** * {@inheritDoc} */ @Override public GraphCollection getGraphs(final GradoopId... identifiers) { GradoopIdList graphIds = new GradoopIdList(); for (GradoopId id : identifiers) { graphIds.add(id); } return getGraphs(graphIds); } /** * {@inheritDoc} */ @Override public GraphCollection getGraphs(final GradoopIdList identifiers) { DataSet<GraphHead> newGraphHeads = this.getGraphHeads() .filter(new FilterFunction<GraphHead>() { @Override public boolean filter(GraphHead graphHead) throws Exception { return identifiers.contains(graphHead.getId()); } }); // build new vertex set DataSet<Vertex> vertices = getVertices() .filter(new InAnyGraph<>(identifiers)); // build new edge set DataSet<Edge> edges = getEdges() .filter(new InAnyGraph<>(identifiers)); return new GraphCollection(newGraphHeads, vertices, edges, getConfig()); } //---------------------------------------------------------------------------- // Unary Operators //---------------------------------------------------------------------------- /** * {@inheritDoc} */ @Override public GraphCollection select(final FilterFunction<GraphHead> predicate) { return callForCollection(new Selection(predicate)); } /** * {@inheritDoc} */ @Override public GraphCollection sortBy(String propertyKey, Order order) { throw new NotImplementedException(); } /** * {@inheritDoc} */ @Override public GraphCollection limit(int n) { return callForCollection(new Limit(n)); } /** * {@inheritDoc} */ @Override public GraphCollection match( String pattern, PatternMatchingAlgorithm algorithm, boolean returnEmbeddings) { return new TransactionalPatternMatching( pattern, algorithm, returnEmbeddings).execute(this); } //---------------------------------------------------------------------------- // Binary Operators //---------------------------------------------------------------------------- /** * {@inheritDoc} */ @Override public GraphCollection union(GraphCollection otherCollection) { return callForCollection(new Union(), otherCollection); } /** * {@inheritDoc} */ @Override public GraphCollection intersect(GraphCollection otherCollection) { return callForCollection(new Intersection(), otherCollection); } /** * {@inheritDoc} */ @Override public GraphCollection intersectWithSmallResult( GraphCollection otherCollection) { return callForCollection(new IntersectionBroadcast(), otherCollection); } /** * {@inheritDoc} */ @Override public GraphCollection difference(GraphCollection otherCollection) { return callForCollection(new Difference(), otherCollection); } /** * {@inheritDoc} */ @Override public GraphCollection differenceWithSmallResult( GraphCollection otherCollection) { return callForCollection(new DifferenceBroadcast(), otherCollection); } /** * {@inheritDoc} */ @Override public DataSet<Boolean> equalsByGraphIds(GraphCollection other) { return new CollectionEqualityByGraphIds().execute(this, other); } /** * {@inheritDoc} */ @Override public DataSet<Boolean> equalsByGraphElementIds(GraphCollection other) { return new CollectionEquality( new GraphHeadToEmptyString(), new VertexToIdString(), new EdgeToIdString(), true).execute(this, other); } /** * {@inheritDoc} */ @Override public DataSet<Boolean> equalsByGraphElementData(GraphCollection other) { return new CollectionEquality( new GraphHeadToEmptyString(), new VertexToDataString(), new EdgeToDataString(), true).execute(this, other); } /** * {@inheritDoc} */ @Override public DataSet<Boolean> equalsByGraphData(GraphCollection other) { return new CollectionEquality( new GraphHeadToDataString(), new VertexToDataString(), new EdgeToDataString(), true).execute(this, other); } //---------------------------------------------------------------------------- // Auxiliary Operators //---------------------------------------------------------------------------- /** * {@inheritDoc} */ @Override public GraphCollection callForCollection( UnaryCollectionToCollectionOperator op) { return op.execute(this); } /** * {@inheritDoc} */ @Override public GraphCollection callForCollection( BinaryCollectionToCollectionOperator op, GraphCollection otherCollection) { return op.execute(this, otherCollection); } /** * {@inheritDoc} */ @Override public LogicalGraph callForGraph(UnaryCollectionToGraphOperator op) { return op.execute(this); } /** * {@inheritDoc} */ @Override public GraphCollection apply(ApplicableUnaryGraphToGraphOperator op) { return callForCollection(op); } /** * {@inheritDoc} */ @Override public LogicalGraph reduce(ReducibleBinaryGraphToGraphOperator op) { return callForGraph(op); } //---------------------------------------------------------------------------- // Utility methods //---------------------------------------------------------------------------- /** * {@inheritDoc} */ @Override public DataSet<Boolean> isEmpty() { return getGraphHeads() .map(new True<>()) .distinct() .union(getConfig().getExecutionEnvironment().fromElements(false)) .reduce(new Or()) .map(new Not()); } /** * Creates a graph collection from a graph transaction dataset. * Overlapping vertices and edge are merged by Id comparison only. * * @param transactions transaction dataset * @return graph collection */ public static GraphCollection fromTransactions(GraphTransactions transactions) { GroupReduceFunction<Vertex, Vertex> vertexReducer = new First<>(); GroupReduceFunction<Edge, Edge> edgeReducer = new First<>(); return fromTransactions(transactions, vertexReducer, edgeReducer); } /** * Creates a graph collection from a graph transaction dataset. * Overlapping vertices and edge are merged using provided reduce functions. * * @param transactions transaction dataset * @param vertexMergeReducer vertex merge function * @param edgeMergeReducer edge merge function * @return graph collection */ public static GraphCollection fromTransactions( GraphTransactions transactions, GroupReduceFunction<Vertex, Vertex> vertexMergeReducer, GroupReduceFunction<Edge, Edge> edgeMergeReducer) { GradoopFlinkConfig config = transactions.getConfig(); DataSet<Tuple3<GraphHead, Set<Vertex>, Set<Edge>>> triples = transactions .getTransactions() .map(new GraphTransactionTriple()); DataSet<GraphHead> graphHeads = triples.map(new TransactionGraphHead()); DataSet<Vertex> vertices = triples .flatMap(new TransactionVertices()) .groupBy(new Id<>()) .reduceGroup(vertexMergeReducer); DataSet<Edge> edges = triples .flatMap(new TransactionEdges()) .groupBy(new Id<>()) .reduceGroup(edgeMergeReducer); return fromDataSets(graphHeads, vertices, edges, config); } /** * {@inheritDoc} */ @Override public GraphTransactions toTransactions() { DataSet<Tuple2<GradoopId, GraphElement>> vertices = getVertices() .map(new Cast<>(GraphElement.class)) .returns(TypeExtractor.getForClass(GraphElement.class)) .flatMap(new GraphElementExpander<>()); DataSet<Tuple2<GradoopId, GraphElement>> edges = getEdges() .map(new Cast<>(GraphElement.class)) .returns(TypeExtractor.getForClass(GraphElement.class)) .flatMap(new GraphElementExpander<>()); DataSet<Tuple3<GradoopId, Set<Vertex>, Set<Edge>>> transactions = vertices .union(edges) .groupBy(0) .combineGroup(new GraphVerticesEdges()) .groupBy(0) .reduceGroup(new GraphVerticesEdges()); DataSet<GraphTransaction> graphTransactions = getGraphHeads() .leftOuterJoin(transactions) .where(new Id<>()).equalTo(0) .with(new TransactionFromSets()); return new GraphTransactions(graphTransactions, getConfig()); } /** * {@inheritDoc} */ @Override public GraphCollection distinctById() { return callForCollection(new DistinctById()); } /** * {@inheritDoc} */ @Override public GraphCollection distinctByIsomorphism() { return callForCollection(new DistinctByIsomorphism()); } @Override public GraphCollection groupByIsomorphism(GraphHeadReduceFunction func) { return callForCollection(new GroupByIsomorphism(func)); } /** * {@inheritDoc} */ @Override public void writeTo(DataSink dataSink) throws IOException { dataSink.write(this); } }