/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph.library.metric.directed;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.accumulators.LongMaximum;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.graph.AbstractGraphAnalytic;
import org.apache.flink.graph.AnalyticHelper;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.Vertex;
import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees;
import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees.Degrees;
import org.apache.flink.graph.asm.result.PrintableResult;
import org.apache.flink.graph.library.metric.directed.VertexMetrics.Result;
import java.io.IOException;
import java.text.NumberFormat;
import static org.apache.flink.api.common.ExecutionConfig.PARALLELISM_DEFAULT;
/**
* Compute the following vertex metrics in a directed graph:
* - number of vertices
* - number of edges
* - number of unidirectional edges
* - number of bidirectional edges
* - average degree
* - number of triplets
* - maximum degree
* - maximum out degree
* - maximum in degree
* - maximum number of triplets
*
* @param <K> graph ID type
* @param <VV> vertex value type
* @param <EV> edge value type
*/
public class VertexMetrics<K extends Comparable<K>, VV, EV>
extends AbstractGraphAnalytic<K, VV, EV, Result> {
private static final String VERTEX_COUNT = "vertexCount";
private static final String UNIDIRECTIONAL_EDGE_COUNT = "unidirectionalEdgeCount";
private static final String BIDIRECTIONAL_EDGE_COUNT = "bidirectionalEdgeCount";
private static final String TRIPLET_COUNT = "tripletCount";
private static final String MAXIMUM_DEGREE = "maximumDegree";
private static final String MAXIMUM_OUT_DEGREE = "maximumOutDegree";
private static final String MAXIMUM_IN_DEGREE = "maximumInDegree";
private static final String MAXIMUM_TRIPLETS = "maximumTriplets";
private VertexMetricsHelper<K> vertexMetricsHelper;
// Optional configuration
private boolean includeZeroDegreeVertices = false;
private int parallelism = PARALLELISM_DEFAULT;
/**
* By default only the edge set is processed for the computation of degree.
* When this flag is set an additional join is performed against the vertex
* set in order to output vertices with a degree of zero.
*
* @param includeZeroDegreeVertices whether to output vertices with a
* degree of zero
* @return this
*/
public VertexMetrics<K, VV, EV> setIncludeZeroDegreeVertices(boolean includeZeroDegreeVertices) {
this.includeZeroDegreeVertices = includeZeroDegreeVertices;
return this;
}
/**
* Override the operator parallelism.
*
* @param parallelism operator parallelism
* @return this
*/
public VertexMetrics<K, VV, EV> setParallelism(int parallelism) {
this.parallelism = parallelism;
return this;
}
@Override
public VertexMetrics<K, VV, EV> run(Graph<K, VV, EV> input)
throws Exception {
super.run(input);
DataSet<Vertex<K, Degrees>> vertexDegree = input
.run(new VertexDegrees<K, VV, EV>()
.setIncludeZeroDegreeVertices(includeZeroDegreeVertices)
.setParallelism(parallelism));
vertexMetricsHelper = new VertexMetricsHelper<>();
vertexDegree
.output(vertexMetricsHelper)
.name("Vertex metrics");
return this;
}
@Override
public Result getResult() {
long vertexCount = vertexMetricsHelper.getAccumulator(env, VERTEX_COUNT);
long unidirectionalEdgeCount = vertexMetricsHelper.getAccumulator(env, UNIDIRECTIONAL_EDGE_COUNT);
long bidirectionalEdgeCount = vertexMetricsHelper.getAccumulator(env, BIDIRECTIONAL_EDGE_COUNT);
long tripletCount = vertexMetricsHelper.getAccumulator(env, TRIPLET_COUNT);
long maximumDegree = vertexMetricsHelper.getAccumulator(env, MAXIMUM_DEGREE);
long maximumOutDegree = vertexMetricsHelper.getAccumulator(env, MAXIMUM_OUT_DEGREE);
long maximumInDegree = vertexMetricsHelper.getAccumulator(env, MAXIMUM_IN_DEGREE);
long maximumTriplets = vertexMetricsHelper.getAccumulator(env, MAXIMUM_TRIPLETS);
// each edge is counted twice, once from each vertex, so must be halved
return new Result(vertexCount, unidirectionalEdgeCount / 2, bidirectionalEdgeCount / 2, tripletCount,
maximumDegree, maximumOutDegree, maximumInDegree, maximumTriplets);
}
/**
* Helper class to collect vertex metrics.
*
* @param <T> ID type
*/
private static class VertexMetricsHelper<T>
extends AnalyticHelper<Vertex<T, Degrees>> {
private long vertexCount;
private long unidirectionalEdgeCount;
private long bidirectionalEdgeCount;
private long tripletCount;
private long maximumDegree;
private long maximumOutDegree;
private long maximumInDegree;
private long maximumTriplets;
@Override
public void writeRecord(Vertex<T, Degrees> record) throws IOException {
long degree = record.f1.getDegree().getValue();
long outDegree = record.f1.getOutDegree().getValue();
long inDegree = record.f1.getInDegree().getValue();
long bidirectionalEdges = outDegree + inDegree - degree;
long triplets = degree * (degree - 1) / 2;
vertexCount++;
unidirectionalEdgeCount += degree - bidirectionalEdges;
bidirectionalEdgeCount += bidirectionalEdges;
tripletCount += triplets;
maximumDegree = Math.max(maximumDegree, degree);
maximumOutDegree = Math.max(maximumOutDegree, outDegree);
maximumInDegree = Math.max(maximumInDegree, inDegree);
maximumTriplets = Math.max(maximumTriplets, triplets);
}
@Override
public void close() throws IOException {
addAccumulator(VERTEX_COUNT, new LongCounter(vertexCount));
addAccumulator(UNIDIRECTIONAL_EDGE_COUNT, new LongCounter(unidirectionalEdgeCount));
addAccumulator(BIDIRECTIONAL_EDGE_COUNT, new LongCounter(bidirectionalEdgeCount));
addAccumulator(TRIPLET_COUNT, new LongCounter(tripletCount));
addAccumulator(MAXIMUM_DEGREE, new LongMaximum(maximumDegree));
addAccumulator(MAXIMUM_OUT_DEGREE, new LongMaximum(maximumOutDegree));
addAccumulator(MAXIMUM_IN_DEGREE, new LongMaximum(maximumInDegree));
addAccumulator(MAXIMUM_TRIPLETS, new LongMaximum(maximumTriplets));
}
}
/**
* Wraps vertex metrics.
*/
public static class Result
implements PrintableResult {
private long vertexCount;
private long unidirectionalEdgeCount;
private long bidirectionalEdgeCount;
private long tripletCount;
private long maximumDegree;
private long maximumOutDegree;
private long maximumInDegree;
private long maximumTriplets;
public Result(long vertexCount, long unidirectionalEdgeCount, long bidirectionalEdgeCount, long tripletCount,
long maximumDegree, long maximumOutDegree, long maximumInDegree, long maximumTriplets) {
this.vertexCount = vertexCount;
this.unidirectionalEdgeCount = unidirectionalEdgeCount;
this.bidirectionalEdgeCount = bidirectionalEdgeCount;
this.tripletCount = tripletCount;
this.maximumDegree = maximumDegree;
this.maximumOutDegree = maximumOutDegree;
this.maximumInDegree = maximumInDegree;
this.maximumTriplets = maximumTriplets;
}
/**
* Get the number of vertices.
*
* @return number of vertices
*/
public long getNumberOfVertices() {
return vertexCount;
}
/**
* Get the number of edges.
*
* @return number of edges
*/
public long getNumberOfEdges() {
return unidirectionalEdgeCount + 2 * bidirectionalEdgeCount;
}
/**
* Get the number of unidirectional edges.
*
* @return number of unidirectional edges
*/
public long getNumberOfDirectedEdges() {
return unidirectionalEdgeCount;
}
/**
* Get the number of bidirectional edges.
*
* @return number of bidirectional edges
*/
public long getNumberOfUndirectedEdges() {
return bidirectionalEdgeCount;
}
/**
* Get the average degree, the average number of in- plus out-edges per vertex.
*
* A result of {@code Float.NaN} is returned for an empty graph for
* which both the number of edges and number of vertices is zero.
*
* @return average degree
*/
public double getAverageDegree() {
return vertexCount == 0 ? Double.NaN : getNumberOfEdges() / (double)vertexCount;
}
/**
* Get the density, the ratio of actual to potential edges between vertices.
*
* A result of {@code Float.NaN} is returned for a graph with fewer than
* two vertices for which the number of edges is zero.
*
* @return density
*/
public double getDensity() {
return vertexCount <= 1 ? Double.NaN : getNumberOfEdges() / (double)(vertexCount*(vertexCount-1));
}
/**
* Get the number of triplets.
*
* @return number of triplets
*/
public long getNumberOfTriplets() {
return tripletCount;
}
/**
* Get the maximum degree.
*
* @return maximum degree
*/
public long getMaximumDegree() {
return maximumDegree;
}
/**
* Get the maximum out degree.
*
* @return maximum out degree
*/
public long getMaximumOutDegree() {
return maximumOutDegree;
}
/**
* Get the maximum in degree.
*
* @return maximum in degree
*/
public long getMaximumInDegree() {
return maximumInDegree;
}
/**
* Get the maximum triplets.
*
* @return maximum triplets
*/
public long getMaximumTriplets() {
return maximumTriplets;
}
@Override
public String toPrintableString() {
NumberFormat nf = NumberFormat.getInstance();
// format for very small fractional numbers
NumberFormat ff = NumberFormat.getInstance();
ff.setMaximumFractionDigits(8);
return "vertex count: " + nf.format(vertexCount)
+ "; edge count: " + nf.format(getNumberOfEdges())
+ "; unidirectional edge count: " + nf.format(unidirectionalEdgeCount)
+ "; bidirectional edge count: " + nf.format(bidirectionalEdgeCount)
+ "; average degree: " + nf.format(getAverageDegree())
+ "; density: " + ff.format(getDensity())
+ "; triplet count: " + nf.format(tripletCount)
+ "; maximum degree: " + nf.format(maximumDegree)
+ "; maximum out degree: " + nf.format(maximumOutDegree)
+ "; maximum in degree: " + nf.format(maximumInDegree)
+ "; maximum triplets: " + nf.format(maximumTriplets);
}
@Override
public int hashCode() {
return new HashCodeBuilder()
.append(vertexCount)
.append(unidirectionalEdgeCount)
.append(bidirectionalEdgeCount)
.append(tripletCount)
.append(maximumDegree)
.append(maximumOutDegree)
.append(maximumInDegree)
.append(maximumTriplets)
.hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj == null) { return false; }
if (obj == this) { return true; }
if (obj.getClass() != getClass()) { return false; }
Result rhs = (Result)obj;
return new EqualsBuilder()
.append(vertexCount, rhs.vertexCount)
.append(unidirectionalEdgeCount, rhs.unidirectionalEdgeCount)
.append(bidirectionalEdgeCount, rhs.bidirectionalEdgeCount)
.append(tripletCount, rhs.tripletCount)
.append(maximumDegree, rhs.maximumDegree)
.append(maximumOutDegree, rhs.maximumOutDegree)
.append(maximumInDegree, rhs.maximumInDegree)
.append(maximumTriplets, rhs.maximumTriplets)
.isEquals();
}
}
}