/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.graph.library.clustering.undirected; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.GroupReduceFunction; import org.apache.flink.api.common.functions.JoinFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.operators.Order; import org.apache.flink.api.common.operators.base.JoinOperatorBase; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields; import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst; import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple3; import org.apache.flink.graph.Edge; import org.apache.flink.graph.Graph; import org.apache.flink.graph.asm.degree.annotate.undirected.EdgeDegreePair; import org.apache.flink.graph.library.clustering.undirected.TriangleListing.Result; import org.apache.flink.graph.asm.result.PrintableResult; import org.apache.flink.graph.asm.result.TertiaryResult; import org.apache.flink.graph.utils.proxy.GraphAlgorithmWrappingDataSet; import org.apache.flink.graph.utils.proxy.OptionalBoolean; import org.apache.flink.types.CopyableValue; import org.apache.flink.types.LongValue; import org.apache.flink.util.Collector; import org.apache.flink.util.Preconditions; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import static org.apache.flink.api.common.ExecutionConfig.PARALLELISM_DEFAULT; /** * Generates a listing of distinct triangles from the input graph. * <p> * A triangle is a 3-cycle with vertices A, B, and C connected by edges * (A, B), (A, C), and (B, C). * <p> * The input graph must be a simple, undirected graph containing no duplicate * edges or self-loops. * <p> * Algorithm from "Finding, Counting and Listing all Triangles in Large Graphs, * An Experimental Study", Thomas Schank and Dorothea Wagner. * http://i11www.iti.uni-karlsruhe.de/extra/publications/sw-fclt-05_t.pdf * * @param <K> graph ID type * @param <VV> vertex value type * @param <EV> edge value type */ public class TriangleListing<K extends Comparable<K> & CopyableValue<K>, VV, EV> extends GraphAlgorithmWrappingDataSet<K, VV, EV, Result<K>> { // Optional configuration private OptionalBoolean sortTriangleVertices = new OptionalBoolean(false, true); private int littleParallelism = PARALLELISM_DEFAULT; /** * Normalize the triangle listing such that for each result (K0, K1, K2) * the vertex IDs are sorted K0 < K1 < K2. * * @param sortTriangleVertices whether to output each triangle's vertices in sorted order * @return this */ public TriangleListing<K, VV, EV> setSortTriangleVertices(boolean sortTriangleVertices) { this.sortTriangleVertices.set(sortTriangleVertices); return this; } /** * Override the parallelism of operators processing small amounts of data. * * @param littleParallelism operator parallelism * @return this */ public TriangleListing<K, VV, EV> setLittleParallelism(int littleParallelism) { Preconditions.checkArgument(littleParallelism > 0 || littleParallelism == PARALLELISM_DEFAULT, "The parallelism must be greater than zero."); this.littleParallelism = littleParallelism; return this; } @Override protected String getAlgorithmName() { return TriangleListing.class.getName(); } @Override protected boolean mergeConfiguration(GraphAlgorithmWrappingDataSet other) { Preconditions.checkNotNull(other); if (! TriangleListing.class.isAssignableFrom(other.getClass())) { return false; } TriangleListing rhs = (TriangleListing) other; sortTriangleVertices.mergeWith(rhs.sortTriangleVertices); littleParallelism = (littleParallelism == PARALLELISM_DEFAULT) ? rhs.littleParallelism : ((rhs.littleParallelism == PARALLELISM_DEFAULT) ? littleParallelism : Math.min(littleParallelism, rhs.littleParallelism)); return true; } /* * Implementation notes: * * The requirement that "K extends CopyableValue<K>" can be removed when * Flink has a self-join and GenerateTriplets is implemented as such. * * ProjectTriangles should eventually be replaced by ".projectFirst("*")" * when projections use code generation. */ @Override public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input) throws Exception { // u, v where u < v DataSet<Tuple2<K, K>> filteredByID = input .getEdges() .flatMap(new FilterByID<K, EV>()) .setParallelism(littleParallelism) .name("Filter by ID"); // u, v, (edge value, deg(u), deg(v)) DataSet<Edge<K, Tuple3<EV, LongValue, LongValue>>> pairDegree = input .run(new EdgeDegreePair<K, VV, EV>() .setParallelism(littleParallelism)); // u, v where deg(u) < deg(v) or (deg(u) == deg(v) and u < v) DataSet<Tuple2<K, K>> filteredByDegree = pairDegree .flatMap(new FilterByDegree<K, EV>()) .setParallelism(littleParallelism) .name("Filter by degree"); // u, v, w where (u, v) and (u, w) are edges in graph, v < w DataSet<Tuple3<K, K, K>> triplets = filteredByDegree .groupBy(0) .sortGroup(1, Order.ASCENDING) .reduceGroup(new GenerateTriplets<K>()) .name("Generate triplets"); // u, v, w where (u, v), (u, w), and (v, w) are edges in graph, v < w DataSet<Result<K>> triangles = triplets .join(filteredByID, JoinOperatorBase.JoinHint.REPARTITION_HASH_SECOND) .where(1, 2) .equalTo(0, 1) .with(new ProjectTriangles<K>()) .name("Triangle listing"); if (sortTriangleVertices.get()) { triangles = triangles .map(new SortTriangleVertices<K>()) .name("Sort triangle vertices"); } return triangles; } /** * Removes edge values while filtering such that only edges where the * source vertex ID compares less than the target vertex ID are emitted. * <p> * Since the input graph is a simple graph this filter removes exactly half * of the original edges. * * @param <T> ID type * @param <ET> edge value type */ @ForwardedFields("0; 1") private static final class FilterByID<T extends Comparable<T>, ET> implements FlatMapFunction<Edge<T, ET>, Tuple2<T, T>> { private Tuple2<T, T> edge = new Tuple2<>(); @Override public void flatMap(Edge<T, ET> value, Collector<Tuple2<T, T>> out) throws Exception { if (value.f0.compareTo(value.f1) < 0) { edge.f0 = value.f0; edge.f1 = value.f1; out.collect(edge); } } } /** * Removes edge values while filtering such that edges where the source * vertex has lower degree are emitted. If the source and target vertex * degrees are equal then the edge is emitted if the source vertex ID * compares less than the target vertex ID. * <p> * Since the input graph is a simple graph this filter removes exactly half * of the original edges. * * @param <T> ID type */ @ForwardedFields("0; 1") private static final class FilterByDegree<T extends Comparable<T>, ET> implements FlatMapFunction<Edge<T, Tuple3<ET, LongValue, LongValue>>, Tuple2<T, T>> { private Tuple2<T, T> edge = new Tuple2<>(); @Override public void flatMap(Edge<T, Tuple3<ET, LongValue, LongValue>> value, Collector<Tuple2<T, T>> out) throws Exception { Tuple3<ET, LongValue, LongValue> degrees = value.f2; long sourceDegree = degrees.f1.getValue(); long targetDegree = degrees.f2.getValue(); if (sourceDegree < targetDegree || (sourceDegree == targetDegree && value.f0.compareTo(value.f1) < 0)) { edge.f0 = value.f0; edge.f1 = value.f1; out.collect(edge); } } } /** * Generates the set of triplets by the pairwise enumeration of the open * neighborhood for each vertex. The number of triplets is quadratic in * the vertex degree; however, data skew is minimized by only generating * triplets from the vertex with least degree. * * @param <T> ID type */ @ForwardedFields("0") private static final class GenerateTriplets<T extends CopyableValue<T>> implements GroupReduceFunction<Tuple2<T, T>, Tuple3<T, T, T>> { private Tuple3<T, T, T> output = new Tuple3<>(); private List<T> visited = new ArrayList<>(); @Override public void reduce(Iterable<Tuple2<T, T>> values, Collector<Tuple3<T, T, T>> out) throws Exception { int visitedCount = 0; Iterator<Tuple2<T, T>> iter = values.iterator(); while (true) { Tuple2<T, T> edge = iter.next(); output.f0 = edge.f0; output.f2 = edge.f1; for (int i = 0; i < visitedCount; i++) { output.f1 = visited.get(i); out.collect(output); } if (! iter.hasNext()) { break; } if (visitedCount == visited.size()) { visited.add(edge.f1.copy()); } else { edge.f1.copyTo(visited.get(visitedCount)); } visitedCount += 1; } } } /** * Simply project the triplet as a triangle. * * @param <T> ID type */ @ForwardedFieldsFirst("0; 1; 2") @ForwardedFieldsSecond("0; 1") private static final class ProjectTriangles<T> implements JoinFunction<Tuple3<T, T, T>, Tuple2<T, T>, Result<T>> { private Result<T> output = new Result<>(); @Override public Result<T> join(Tuple3<T, T, T> triplet, Tuple2<T, T> edge) throws Exception { output.f0 = triplet.f0; output.f1 = triplet.f1; output.f2 = triplet.f2; return output; } } /** * Reorders the vertices of each emitted triangle (K0, K1, K2) * into sorted order such that K0 < K1 < K2. * * @param <T> ID type */ private static final class SortTriangleVertices<T extends Comparable<T>> implements MapFunction<Result<T>, Result<T>> { @Override public Result<T> map(Result<T> value) throws Exception { // by the triangle listing algorithm we know f1 < f2 if (value.f0.compareTo(value.f1) > 0) { T temp_val = value.f0; value.f0 = value.f1; if (temp_val.compareTo(value.f2) <= 0) { value.f1 = temp_val; } else { value.f1 = value.f2; value.f2 = temp_val; } } return value; } } /** * Wraps {@link Tuple3} to encapsulate results from the undirected Triangle Listing algorithm. * * @param <T> ID type */ public static class Result<T> extends Tuple3<T, T, T> implements PrintableResult, TertiaryResult<T> { @Override public T getVertexId0() { return f0; } @Override public void setVertexId0(T value) { f0 = value; } @Override public T getVertexId1() { return f1; } @Override public void setVertexId1(T value) { f1 = value; } @Override public T getVertexId2() { return f2; } @Override public void setVertexId2(T value) { f2 = value; } /** * Format values into a human-readable string. * * @return verbose string */ public String toPrintableString() { return "1st vertex ID: " + getVertexId0() + ", 2nd vertex ID: " + getVertexId1() + ", 3rd vertex ID: " + getVertexId2(); } } }