/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.examples.java.graph; import org.apache.flink.api.common.functions.GroupReduceFunction; import org.apache.flink.api.common.functions.JoinFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.operators.Order; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.examples.java.graph.util.EnumTrianglesData; import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.Edge; import org.apache.flink.examples.java.graph.util.EnumTrianglesDataTypes.Triad; import org.apache.flink.util.Collector; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * Triangle enumeration is a pre-processing step to find closely connected parts in graphs. * A triangle consists of three edges that connect three vertices with each other. * * <p> * The algorithm works as follows: * It groups all edges that share a common vertex and builds triads, i.e., triples of vertices * that are connected by two edges. Finally, all triads are filtered for which no third edge exists * that closes the triangle. * * <p> * Input files are plain text files and must be formatted as follows: * <ul> * <li>Edges are represented as pairs for vertex IDs which are separated by space * characters. Edges are separated by new-line characters.<br> * For example <code>"1 2\n2 12\n1 12\n42 63"</code> gives four (undirected) edges (1)-(2), (2)-(12), (1)-(12), and (42)-(63) * that include a triangle * </ul> * <pre> * (1) * / \ * (2)-(12) * </pre> * * Usage: <code>EnumTriangleBasic --edges <path> --output <path></code><br> * If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}. * * <p> * This example shows how to use: * <ul> * <li>Custom Java objects which extend Tuple * <li>Group Sorting * </ul> * */ @SuppressWarnings("serial") public class EnumTriangles { // ************************************************************************* // PROGRAM // ************************************************************************* public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // read input data DataSet<Edge> edges; if (params.has("edges")) { edges = env.readCsvFile(params.get("edges")) .fieldDelimiter(" ") .includeFields(true, true) .types(Integer.class, Integer.class) .map(new TupleEdgeConverter()); } else { System.out.println("Executing EnumTriangles example with default edges data set."); System.out.println("Use --edges to specify file input."); edges = EnumTrianglesData.getDefaultEdgeDataSet(env); } // project edges by vertex id DataSet<Edge> edgesById = edges .map(new EdgeByIdProjector()); DataSet<Triad> triangles = edgesById // build triads .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder()) // filter triads .join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter()); // emit result if (params.has("output")) { triangles.writeAsCsv(params.get("output"), "\n", ","); // execute program env.execute("Basic Triangle Enumeration Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); triangles.print(); } } // ************************************************************************* // USER FUNCTIONS // ************************************************************************* /** Converts a Tuple2 into an Edge */ @ForwardedFields("0;1") public static class TupleEdgeConverter implements MapFunction<Tuple2<Integer, Integer>, Edge> { private final Edge outEdge = new Edge(); @Override public Edge map(Tuple2<Integer, Integer> t) throws Exception { outEdge.copyVerticesFromTuple2(t); return outEdge; } } /** Projects an edge (pair of vertices) such that the id of the first is smaller than the id of the second. */ private static class EdgeByIdProjector implements MapFunction<Edge, Edge> { @Override public Edge map(Edge inEdge) throws Exception { // flip vertices if necessary if(inEdge.getFirstVertex() > inEdge.getSecondVertex()) { inEdge.flipVertices(); } return inEdge; } } /** * Builds triads (triples of vertices) from pairs of edges that share a vertex. * The first vertex of a triad is the shared vertex, the second and third vertex are ordered by vertexId. * Assumes that input edges share the first vertex and are in ascending order of the second vertex. */ @ForwardedFields("0") private static class TriadBuilder implements GroupReduceFunction<Edge, Triad> { private final List<Integer> vertices = new ArrayList<Integer>(); private final Triad outTriad = new Triad(); @Override public void reduce(Iterable<Edge> edgesIter, Collector<Triad> out) throws Exception { final Iterator<Edge> edges = edgesIter.iterator(); // clear vertex list vertices.clear(); // read first edge Edge firstEdge = edges.next(); outTriad.setFirstVertex(firstEdge.getFirstVertex()); vertices.add(firstEdge.getSecondVertex()); // build and emit triads while (edges.hasNext()) { Integer higherVertexId = edges.next().getSecondVertex(); // combine vertex with all previously read vertices for (Integer lowerVertexId : vertices) { outTriad.setSecondVertex(lowerVertexId); outTriad.setThirdVertex(higherVertexId); out.collect(outTriad); } vertices.add(higherVertexId); } } } /** Filters triads (three vertices connected by two edges) without a closing third edge. */ private static class TriadFilter implements JoinFunction<Triad, Edge, Triad> { @Override public Triad join(Triad triad, Edge edge) throws Exception { return triad; } } }