/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.examples.sna; import com.google.common.base.Preconditions; import org.apache.flink.api.common.ProgramDescription; import org.gradoop.examples.AbstractRunner; import org.gradoop.flink.model.impl.LogicalGraph; import org.gradoop.flink.model.impl.operators.aggregation.functions.count.EdgeCount; import org.gradoop.flink.model.impl.operators.aggregation.functions.count.VertexCount; import java.util.Arrays; /** * The benchmark program executes the following workflow: * * 1) Extract subgraph with: * - vertex predicate: must be of type 'Person' * - edge predicate: must be of type 'knows' * 2) Group the subgraph using the vertex attributes 'city' and 'gender' and * - count the number of vertices represented by each super vertex * - count the number of edges represented by each super edge * 3) Aggregate the grouped graph: * - add the total vertex count as new graph property * - add the total edge count as new graph property */ public class SNABenchmark1 extends AbstractRunner implements ProgramDescription { /** * Runs the example program. * * Need a (possibly HDFS) input directory that contains * - nodes.json * - edges.json * - graphs.json * * Needs a (possibly HDFS) output directory to write the resulting graph to. * * @param args args[0] = input dir, args[1] output dir * @throws Exception */ @SuppressWarnings({ "unchecked", "Duplicates" }) public static void main(String[] args) throws Exception { Preconditions.checkArgument( args.length == 2, "input dir and output dir required"); String inputDir = args[0]; String outputDir = args[1]; LogicalGraph epgmDatabase = readLogicalGraph(inputDir); LogicalGraph result = execute(epgmDatabase); writeLogicalGraph(result, outputDir); } /** * The actual computation. * * @param socialNetwork social network graph * @return summarized, aggregated graph */ private static LogicalGraph execute(LogicalGraph socialNetwork) { return socialNetwork .subgraph( vertex -> vertex.getLabel().equals("person"), edge -> edge.getLabel().equals("knows")) .groupBy(Arrays.asList("gender", "city")) .aggregate(new VertexCount()) .aggregate(new EdgeCount()); } @Override public String getDescription() { return SNABenchmark1.class.getName(); } }