/*
* This file is part of Gradoop.
*
* Gradoop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Gradoop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
*/
package org.gradoop.examples.grouping;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.io.api.DataSink;
import org.gradoop.flink.io.api.DataSource;
import org.gradoop.flink.io.impl.csv.CSVDataSource;
import org.gradoop.flink.io.impl.dot.DOTDataSink;
import org.gradoop.flink.model.impl.LogicalGraph;
import org.gradoop.flink.model.impl.operators.grouping.GroupingStrategy;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.CountAggregator;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.MaxAggregator;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.MinAggregator;
import org.gradoop.flink.util.GradoopFlinkConfig;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
/**
* Demo program that combines the grouping operator with
*
* (1) the subgraph operator extract vertices and edges by specified predicate functions and
* (2) the transformation operator to modify vertex properties which are used for grouping
*/
public class Composition extends AbstractRunner {
/**
* Loads a social network graph from the specified location, applies vertex and edge predicates
* and groups the resulting graph by vertex properties.
*
* args[0] - input path (CSV)
* args[1] - output path
*
* @param args arguments
* @throws IOException if something goes wrong
*/
public static void main(String[] args) throws Exception {
String inputPath = args[0];
String outputPath = args[1];
// instantiate a default gradoop config
GradoopFlinkConfig config = GradoopFlinkConfig.createConfig(getExecutionEnvironment());
// define a data source to load the graph
DataSource dataSource = new CSVDataSource(inputPath, config);
// load the graph
LogicalGraph socialNetwork = dataSource.getLogicalGraph();
// use the subgraph operator to filter the graph
LogicalGraph subgraph = socialNetwork.subgraph(
v -> v.getLabel().equals("person"),
e -> e.getLabel().equals("knows"));
// use the transformation operator to classify the 'birthday' property for the users
LogicalGraph transformed = subgraph.transformVertices((current, modified) -> {
Date birthday = new Date(current.getPropertyValue("birthday").getLong());
current.setProperty("yob", birthday.getYear());
current.setProperty("decade", birthday.getYear() - birthday.getYear() % 10);
return current;
});
// group the transformed graph by users decade and apply several aggregate functions
LogicalGraph summary = transformed.groupBy(
Collections.singletonList("decade"), Arrays.asList(
new CountAggregator("count"),
new MinAggregator("yob", "min_yob"),
new MaxAggregator("yob", "max_yob")),
Collections.emptyList(),
Collections.singletonList(new CountAggregator("count")),
GroupingStrategy.GROUP_COMBINE);
// use the decade as label information for the DOT sink
summary = summary.transformVertices((current, modified) -> {
current.setLabel(current.getPropertyValue("decade").toString());
return current;
});
// instantiate a data sink for the DOT format
DataSink dataSink = new DOTDataSink(outputPath, false);
dataSink.write(summary, true);
getExecutionEnvironment().execute();
}
}