/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.graph.library.clustering.directed; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.graph.AbstractGraphAnalytic; import org.apache.flink.graph.AnalyticHelper; import org.apache.flink.graph.Graph; import org.apache.flink.graph.Vertex; import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees; import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees.Degrees; import org.apache.flink.graph.asm.result.PrintableResult; import org.apache.flink.graph.library.clustering.directed.TriadicCensus.Result; import org.apache.flink.types.CopyableValue; import org.apache.flink.util.Preconditions; import java.io.IOException; import java.math.BigInteger; import java.text.NumberFormat; import static org.apache.flink.api.common.ExecutionConfig.PARALLELISM_DEFAULT; /** * A triad is formed by three connected or unconnected vertices in a graph. * The triadic census counts the occurrences of each type of triad. * <p> * http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf * * @param <K> graph ID type * @param <VV> vertex value type * @param <EV> edge value type */ public class TriadicCensus<K extends Comparable<K> & CopyableValue<K>, VV, EV> extends AbstractGraphAnalytic<K, VV, EV, Result> { private TriangleListingHelper<K> triangleListingHelper; private VertexDegreesHelper<K> vertexDegreesHelper; // Optional configuration private int littleParallelism = PARALLELISM_DEFAULT; /** * Override the parallelism of operators processing small amounts of data. * * @param littleParallelism operator parallelism * @return this */ public TriadicCensus<K, VV, EV> setLittleParallelism(int littleParallelism) { this.littleParallelism = littleParallelism; return this; } @Override public TriadicCensus<K, VV, EV> run(Graph<K, VV, EV> input) throws Exception { super.run(input); triangleListingHelper = new TriangleListingHelper<>(); input .run(new TriangleListing<K, VV, EV>() .setLittleParallelism(littleParallelism)) .output(triangleListingHelper) .name("Triangle counts"); vertexDegreesHelper = new VertexDegreesHelper<>(); input .run(new VertexDegrees<K, VV, EV>() .setParallelism(littleParallelism)) .output(vertexDegreesHelper) .name("Edge and triplet counts"); return this; } @Override public Result getResult() { BigInteger one = BigInteger.ONE; BigInteger two = BigInteger.valueOf(2); BigInteger three = BigInteger.valueOf(3); BigInteger six = BigInteger.valueOf(6); BigInteger vertexCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "vc")); BigInteger unidirectionalEdgeCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "uec") / 2); BigInteger bidirectionalEdgeCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "bec") / 2); BigInteger triplet021dCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "021d")); BigInteger triplet021uCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "021u")); BigInteger triplet021cCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "021c")); BigInteger triplet111dCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "111d")); BigInteger triplet111uCount = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "111u")); BigInteger triplet201Count = BigInteger.valueOf((Long)vertexDegreesHelper.getAccumulator(env, "201")); // triads with three connecting edges = closed triplet = triangle BigInteger triangle030tCount = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "030t")); BigInteger triangle030cCount = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "030c")); BigInteger triangle120dCount = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "120d")); BigInteger triangle120uCount = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "120u")); BigInteger triangle120cCount = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "120c")); BigInteger triangle210Count = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "210")); BigInteger triangle300Count = BigInteger.valueOf((Long)triangleListingHelper.getAccumulator(env, "300")); // triads with two connecting edges = open triplet; // each triangle deducts the count of three triplets triplet201Count = triplet201Count.subtract(triangle300Count.multiply(three)); triplet201Count = triplet201Count.subtract(triangle210Count); triplet111dCount = triplet111dCount.subtract(triangle210Count); triplet111uCount = triplet111uCount.subtract(triangle210Count); triplet111dCount = triplet111dCount.subtract(triangle120cCount); triplet111uCount = triplet111uCount.subtract(triangle120cCount); triplet021cCount = triplet021cCount.subtract(triangle120cCount); triplet111uCount = triplet111uCount.subtract(triangle120uCount.multiply(two)); triplet021uCount = triplet021uCount.subtract(triangle120uCount); triplet111dCount = triplet111dCount.subtract(triangle120dCount.multiply(two)); triplet021dCount = triplet021dCount.subtract(triangle120dCount); triplet021cCount = triplet021cCount.subtract(triangle030cCount.multiply(three)); triplet021cCount = triplet021cCount.subtract(triangle030tCount); triplet021uCount = triplet021uCount.subtract(triangle030tCount); triplet021dCount = triplet021dCount.subtract(triangle030tCount); // triads with one connecting edge; each edge pairs with `vertex count - 2` vertices; // each triangle deducts from three and each open triplet from two edges BigInteger edge102 = bidirectionalEdgeCount .multiply(vertexCount.subtract(two)) .subtract(triplet111dCount) .subtract(triplet111uCount) .subtract(triplet201Count.multiply(two)) .subtract(triangle120dCount) .subtract(triangle120uCount) .subtract(triangle120cCount) .subtract(triangle210Count.multiply(two)) .subtract(triangle300Count.multiply(three)); BigInteger edge012 = unidirectionalEdgeCount .multiply(vertexCount.subtract(two)) .subtract(triplet021dCount.multiply(two)) .subtract(triplet021uCount.multiply(two)) .subtract(triplet021cCount.multiply(two)) .subtract(triplet111dCount) .subtract(triplet111uCount) .subtract(triangle030tCount.multiply(three)) .subtract(triangle030cCount.multiply(three)) .subtract(triangle120dCount.multiply(two)) .subtract(triangle120uCount.multiply(two)) .subtract(triangle120cCount.multiply(two)) .subtract(triangle210Count); // triads with zero connecting edges; // (vertex count choose 3) minus earlier counts BigInteger triad003 = vertexCount .multiply(vertexCount.subtract(one)) .multiply(vertexCount.subtract(two)) .divide(six) .subtract(edge012) .subtract(edge102) .subtract(triplet021dCount) .subtract(triplet021uCount) .subtract(triplet021cCount) .subtract(triplet111dCount) .subtract(triplet111uCount) .subtract(triangle030tCount) .subtract(triangle030cCount) .subtract(triplet201Count) .subtract(triangle120dCount) .subtract(triangle120uCount) .subtract(triangle120cCount) .subtract(triangle210Count) .subtract(triangle300Count); return new Result(triad003, edge012, edge102, triplet021dCount, triplet021uCount, triplet021cCount, triplet111dCount, triplet111uCount, triangle030tCount, triangle030cCount, triplet201Count, triangle120dCount, triangle120uCount, triangle120cCount, triangle210Count, triangle300Count); } /** * Helper class to collect triadic census metrics from the triangle listing. * * @param <T> ID type */ private static class TriangleListingHelper<T> extends AnalyticHelper<TriangleListing.Result<T>> { private long[] triangleCount = new long[64]; @Override public void writeRecord(TriangleListing.Result<T> record) throws IOException { triangleCount[record.f3.getValue()]++; } @Override public void close() throws IOException { // see table from Batagelj and Mrvar, "A subquadratic triad census algorithm for large // sparse networks with small maximum degree" (this Flink algorithm does not use their // algorithm as we do not assume a small maximum degree) int[] typeTable = new int[]{ 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15, 11, 15, 15, 16}; long triangle030tCount = 0; long triangle030cCount = 0; long triangle120dCount = 0; long triangle120uCount = 0; long triangle120cCount = 0; long triangle210Count = 0; long triangle300tCount = 0; for (int i = 0 ; i < typeTable.length ; i++) { if (typeTable[i] == 9) { triangle030tCount += triangleCount[i]; } else if (typeTable[i] == 10) { triangle030cCount += triangleCount[i]; } else if (typeTable[i] == 12) { triangle120dCount += triangleCount[i]; } else if (typeTable[i] == 13) { triangle120uCount += triangleCount[i]; } else if (typeTable[i] == 14) { triangle120cCount += triangleCount[i]; } else if (typeTable[i] == 15) { triangle210Count += triangleCount[i]; } else if (typeTable[i] == 16) { triangle300tCount += triangleCount[i]; } else { assert triangleCount[i] == 0; } } addAccumulator("030t", new LongCounter(triangle030tCount)); addAccumulator("030c", new LongCounter(triangle030cCount)); addAccumulator("120d", new LongCounter(triangle120dCount)); addAccumulator("120u", new LongCounter(triangle120uCount)); addAccumulator("120c", new LongCounter(triangle120cCount)); addAccumulator("210", new LongCounter(triangle210Count)); addAccumulator("300", new LongCounter(triangle300tCount)); } } /** * Helper class to collect triadic census metrics from vertex degrees. * * @param <T> ID type */ private static class VertexDegreesHelper<T> extends AnalyticHelper<Vertex<T, Degrees>> { private long vertexCount; private long unidirectionalEdgeCount; private long bidirectionalEdgeCount; private long triplet021dCount; private long triplet021uCount; private long triplet021cCount; private long triplet111dCount; private long triplet111uCount; private long triplet201Count; @Override public void writeRecord(Vertex<T, Degrees> record) throws IOException { long degree = record.f1.getDegree().getValue(); long outDegree = record.f1.getOutDegree().getValue(); long inDegree = record.f1.getInDegree().getValue(); long unidirectionalEdgesAsSource = degree - inDegree; long unidirectionalEdgesAsTarget = degree - outDegree; long bidirectionalEdges = inDegree + outDegree - degree; vertexCount++; unidirectionalEdgeCount += unidirectionalEdgesAsSource + unidirectionalEdgesAsTarget; bidirectionalEdgeCount += bidirectionalEdges; triplet021dCount += unidirectionalEdgesAsSource * (unidirectionalEdgesAsSource - 1) / 2; triplet021uCount += unidirectionalEdgesAsTarget * (unidirectionalEdgesAsTarget - 1) / 2; triplet021cCount += unidirectionalEdgesAsSource * unidirectionalEdgesAsTarget; triplet111dCount += unidirectionalEdgesAsTarget * bidirectionalEdges; triplet111uCount += unidirectionalEdgesAsSource * bidirectionalEdges; triplet201Count += bidirectionalEdges * (bidirectionalEdges - 1) / 2; } @Override public void close() throws IOException { addAccumulator("vc", new LongCounter(vertexCount)); addAccumulator("uec", new LongCounter(unidirectionalEdgeCount)); addAccumulator("bec", new LongCounter(bidirectionalEdgeCount)); addAccumulator("021d", new LongCounter(triplet021dCount)); addAccumulator("021u", new LongCounter(triplet021uCount)); addAccumulator("021c", new LongCounter(triplet021cCount)); addAccumulator("111d", new LongCounter(triplet111dCount)); addAccumulator("111u", new LongCounter(triplet111uCount)); addAccumulator("201", new LongCounter(triplet201Count)); } } /** * Wraps triadic census metrics. */ public static class Result implements PrintableResult { private final BigInteger[] counts; public Result(BigInteger... counts) { Preconditions.checkArgument(counts.length == 16, "Expected 16 counts but received " + counts.length); this.counts = counts; } public Result(long... counts) { Preconditions.checkArgument(counts.length == 16, "Expected 16 counts but received " + counts.length); this.counts = new BigInteger[counts.length]; for (int i = 0; i < counts.length; i++) { this.counts[i] = BigInteger.valueOf(counts[i]); } } /** * Get the count of "003" triads which have zero connecting vertices. * * @return count of "003" triads */ public BigInteger getCount003() { return counts[0]; } /** * Get the count of "012" triads which have one unidirectional edge among the vertices. * * @return count of "012" triads */ public BigInteger getCount012() { return counts[1]; } /** * Get the count of "102" triads which have one bidirectional edge among the vertices. * * @return count of "102" triads */ public BigInteger getCount102() { return counts[2]; } /** * Get the count of "021d" triads which have two unidirectional edges among the vertices, * forming an open triplet; both edges source the center vertex. * * @return count of "021d" triads */ public BigInteger getCount021d() { return counts[3]; } /** * Get the count of "021u" triads which have two unidirectional edges among the vertices, * forming an open triplet; both edges target the center vertex. * * @return count of "021u" triads */ public BigInteger getCount021u() { return counts[4]; } /** * Get the count of "021c" triads which have two unidirectional edges among the vertices, * forming an open triplet; one edge sources and one edge targets the center vertex. * * @return count of "021c" triads */ public BigInteger getCount021c() { return counts[5]; } /** * Get the count of "111d" triads which have one unidirectional and one bidirectional edge * among the vertices, forming an open triplet; the unidirectional edge targets the center vertex. * * @return count of "111d" triads */ public BigInteger getCount111d() { return counts[6]; } /** * Get the count of "111u" triads which have one unidirectional and one bidirectional edge * among the vertices, forming an open triplet; the unidirectional edge sources the center vertex. * * @return count of "111u" triads */ public BigInteger getCount111u() { return counts[7]; } /** * Get the count of "030t" triads which have three unidirectional edges among the vertices, * forming a closed triplet, a triangle; two of the unidirectional edges source/target the * same vertex. * * @return count of "030t" triads */ public BigInteger getCount030t() { return counts[8]; } /** * Get the count of "030c" triads which have three unidirectional edges among the vertices, * forming a closed triplet, a triangle; the three unidirectional edges both source and target * different vertices. * * @return count of "030c" triads */ public BigInteger getCount030c() { return counts[9]; } /** * Get the count of "201" triads which have two unidirectional edges among the vertices, * forming an open triplet. * * @return count of "201" triads */ public BigInteger getCount201() { return counts[10]; } /** * Get the count of "120d" triads which have two unidirectional edges and one bidirectional edge * among the vertices, forming a closed triplet, a triangle; both unidirectional edges source * the same vertex. * * @return count of "120d" triads */ public BigInteger getCount120d() { return counts[11]; } /** * Get the count of "120u" triads which have two unidirectional and one bidirectional edges * among the vertices, forming a closed triplet, a triangle; both unidirectional edges target * the same vertex. * * @return count of "120u" triads */ public BigInteger getCount120u() { return counts[12]; } /** * Get the count of "120c" triads which have two unidirectional edges and one bidirectional edge * among the vertices, forming a closed triplet, a triangle; one vertex is sourced by and targeted * by the unidirectional edges. * * @return count of "120c" triads */ public BigInteger getCount120c() { return counts[13]; } /** * Get the count of "210" triads which have one unidirectional edge and two bidirectional edges * among the vertices, forming a closed triplet, a triangle. * * @return count of "210" triads */ public BigInteger getCount210() { return counts[14]; } /** * Get the count of "300" triads which have three bidirectional edges among the vertices, * forming a closed triplet, a triangle. * * @return count of "300" triads */ public BigInteger getCount300() { return counts[15]; } /** * Get the array of counts. * * The order of the counts is from least to most connected: * 003, 012, 102, 021d, 021u, 021c, 111d, 111u, * 030t, 030c, 201, 120d, 120u, 120c, 210, 300 * * @return array of counts */ public BigInteger[] getCounts() { return counts; } @Override public String toPrintableString() { NumberFormat nf = NumberFormat.getInstance(); return "003: " + nf.format(getCount003()) + "; 012: " + nf.format(getCount012()) + "; 102: " + nf.format(getCount102()) + "; 021d: " + nf.format(getCount021d()) + "; 021u: " + nf.format(getCount021u()) + "; 021c: " + nf.format(getCount021c()) + "; 111d: " + nf.format(getCount111d()) + "; 111u: " + nf.format(getCount111u()) + "; 030t: " + nf.format(getCount030t()) + "; 030c: " + nf.format(getCount030c()) + "; 201: " + nf.format(getCount201()) + "; 120d: " + nf.format(getCount120d()) + "; 120u: " + nf.format(getCount120u()) + "; 120c: " + nf.format(getCount120c()) + "; 210: " + nf.format(getCount210()) + "; 300: " + nf.format(getCount300()); } @Override public int hashCode() { return new HashCodeBuilder() .append(counts) .hashCode(); } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (obj == this) { return true; } if (obj.getClass() != getClass()) { return false; } Result rhs = (Result)obj; return new EqualsBuilder() .append(counts, rhs.counts) .isEquals(); } } }