/*
* This file is part of Gradoop.
*
* Gradoop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Gradoop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
*/
package org.gradoop.flink.algorithms.btgs;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.graph.Graph;
import org.apache.flink.types.NullValue;
import org.gradoop.flink.algorithms.btgs.functions.MasterData;
import org.gradoop.flink.algorithms.btgs.functions.NewBtgGraphHead;
import org.gradoop.common.model.impl.pojo.Edge;
import org.gradoop.common.model.impl.pojo.GraphHead;
import org.gradoop.common.model.impl.pojo.Vertex;
import org.gradoop.flink.model.api.operators.UnaryGraphToCollectionOperator;
import org.gradoop.flink.model.impl.GraphCollection;
import org.gradoop.flink.model.impl.LogicalGraph;
import org.gradoop.flink.algorithms.btgs.functions.ComponentToNewBtgId;
import org.gradoop.flink.algorithms.btgs.functions.BtgMessenger;
import org.gradoop.flink.algorithms.btgs.functions.BtgUpdater;
import org.gradoop.flink.algorithms.btgs.functions.ToGellyVertexWithIdValue;
import org.gradoop.flink.algorithms.btgs.functions.CollectGradoopIds;
import org.gradoop.flink.algorithms.btgs.functions.SetBtgId;
import org.gradoop.flink.algorithms.btgs.functions.SetBtgIds;
import org.gradoop.flink.algorithms.btgs.functions.TargetIdBtgId;
import org.gradoop.flink.algorithms.btgs.functions.TransactionalData;
import org.gradoop.flink.model.impl.functions.epgm.ExpandGradoopIds;
import org.gradoop.flink.model.impl.functions.epgm.ToGellyEdgeWithNullValue;
import org.gradoop.flink.model.impl.functions.epgm.Id;
import org.gradoop.flink.model.impl.functions.epgm.SourceId;
import org.gradoop.flink.model.impl.functions.utils.LeftSide;
import org.gradoop.flink.model.impl.functions.tuple.SwitchPair;
import org.gradoop.flink.model.impl.functions.tuple.Value0Of2;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.id.GradoopIdList;
/**
* Part of the BIIIG approach.
* EPGMVertex-centric implementation to isolate business transaction graphs.
*/
public class BusinessTransactionGraphs implements
UnaryGraphToCollectionOperator {
/**
* reserved property key referring to master or transactional data
*/
public static final String SUPERTYPE_KEY = "superType";
/**
* reserved property value to mark master data
*/
public static final String SUPERCLASS_VALUE_MASTER = "M";
/**
* reserved property value to mark transactional data
*/
public static final String SUPERCLASS_VALUE_TRANSACTIONAL = "T";
/**
* reserved label to mark business transaction graphs
*/
public static final String BTG_LABEL = "BusinessTransactionGraph";
/**
* reserved property key referring to the source identifier of vertices
*/
public static final String SOURCEID_KEY = "sid";
@Override
public GraphCollection execute(LogicalGraph iig) {
DataSet<Vertex> masterVertices = iig.getVertices()
.filter(new MasterData<>());
LogicalGraph transGraph = iig
.vertexInducedSubgraph(new TransactionalData<>());
DataSet<Vertex> transVertices = transGraph
.getVertices();
DataSet<org.apache.flink.graph.Edge<GradoopId, NullValue>> transEdges =
transGraph.getEdges().map(new ToGellyEdgeWithNullValue());
Graph<GradoopId, GradoopId, NullValue> gellyTransGraph = Graph.fromDataSet(
transVertices.map(new ToGellyVertexWithIdValue()),
transEdges,
iig.getConfig().getExecutionEnvironment()
);
gellyTransGraph = gellyTransGraph
.getUndirected()
.runScatterGatherIteration(new BtgMessenger(), new BtgUpdater() , 100);
DataSet<Tuple2<GradoopId, GradoopIdList>> btgVerticesMap = gellyTransGraph
.getVerticesAsTuple2()
.map(new SwitchPair<>())
.groupBy(0)
.reduceGroup(new CollectGradoopIds())
.map(new ComponentToNewBtgId());
DataSet<Tuple2<GradoopId, GradoopId>> vertexBtgMap = btgVerticesMap
.flatMap(new ExpandGradoopIds<>())
.map(new SwitchPair<>());
DataSet<GraphHead> graphHeads = btgVerticesMap
.map(new Value0Of2<>())
.map(new NewBtgGraphHead<>(iig.getConfig().getGraphHeadFactory()));
// filter and update edges
DataSet<Edge> btgEdges = iig.getEdges()
.join(vertexBtgMap)
.where(new SourceId<>()).equalTo(0)
.with(new SetBtgId<>());
// update transactional vertices
transVertices = transVertices
.join(vertexBtgMap)
.where(new Id<>()).equalTo(0)
.with(new SetBtgId<>());
// create master data BTG map
vertexBtgMap = btgEdges
.map(new TargetIdBtgId<>())
.join(masterVertices)
.where(0).equalTo(new Id<>())
.with(new LeftSide<>())
.distinct();
DataSet<Tuple2<GradoopId, GradoopIdList>> vertexBtgsMap = vertexBtgMap
.groupBy(0)
//.combineGroup(new CollectGradoopIds())
.reduceGroup(new CollectGradoopIds());
masterVertices = masterVertices.join(vertexBtgsMap)
.where(new Id<>()).equalTo(0)
.with(new SetBtgIds<>());
return GraphCollection.fromDataSets(
graphHeads,
transVertices.union(masterVertices),
btgEdges,
iig.getConfig()
);
}
@Override
public String getName() {
return null;
}
}