/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph.library.clustering.directed;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.common.operators.base.JoinOperatorBase;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.EdgeOrder;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.asm.degree.annotate.directed.EdgeDegreesPair;
import org.apache.flink.graph.asm.degree.annotate.directed.VertexDegrees.Degrees;
import org.apache.flink.graph.asm.result.PrintableResult;
import org.apache.flink.graph.asm.result.TertiaryResult;
import org.apache.flink.graph.library.clustering.directed.TriangleListing.Result;
import org.apache.flink.graph.utils.proxy.GraphAlgorithmWrappingDataSet;
import org.apache.flink.graph.utils.proxy.OptionalBoolean;
import org.apache.flink.types.ByteValue;
import org.apache.flink.types.CopyableValue;
import org.apache.flink.util.Collector;
import org.apache.flink.util.Preconditions;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import static org.apache.flink.api.common.ExecutionConfig.PARALLELISM_DEFAULT;
/**
* Generates a listing of distinct triangles from the input graph.
* <p>
* A triangle is a 3-clique with vertices A, B, and C connected by edges
* (A, B), (A, C), and (B, C).
* <p>
* The input graph must not contain duplicate edges or self-loops.
* <p>
* This algorithm is similar to the undirected version but also tracks and
* computes a bitmask representing the six potential graph edges connecting
* the triangle vertices.
*
* @param <K> graph ID type
* @param <VV> vertex value type
* @param <EV> edge value type
*/
public class TriangleListing<K extends Comparable<K> & CopyableValue<K>, VV, EV>
extends GraphAlgorithmWrappingDataSet<K, VV, EV, Result<K>> {
// Optional configuration
private OptionalBoolean sortTriangleVertices = new OptionalBoolean(false, true);
private int littleParallelism = PARALLELISM_DEFAULT;
/**
* Normalize the triangle listing such that for each result (K0, K1, K2)
* the vertex IDs are sorted K0 < K1 < K2.
*
* @param sortTriangleVertices whether to output each triangle's vertices in sorted order
* @return this
*/
public TriangleListing<K, VV, EV> setSortTriangleVertices(boolean sortTriangleVertices) {
this.sortTriangleVertices.set(sortTriangleVertices);
return this;
}
/**
* Override the parallelism of operators processing small amounts of data.
*
* @param littleParallelism operator parallelism
* @return this
*/
public TriangleListing<K, VV, EV> setLittleParallelism(int littleParallelism) {
Preconditions.checkArgument(littleParallelism > 0 || littleParallelism == PARALLELISM_DEFAULT,
"The parallelism must be greater than zero.");
this.littleParallelism = littleParallelism;
return this;
}
@Override
protected String getAlgorithmName() {
return TriangleListing.class.getName();
}
@Override
protected boolean mergeConfiguration(GraphAlgorithmWrappingDataSet other) {
Preconditions.checkNotNull(other);
if (! TriangleListing.class.isAssignableFrom(other.getClass())) {
return false;
}
TriangleListing rhs = (TriangleListing) other;
sortTriangleVertices.mergeWith(rhs.sortTriangleVertices);
littleParallelism = (littleParallelism == PARALLELISM_DEFAULT) ? rhs.littleParallelism :
((rhs.littleParallelism == PARALLELISM_DEFAULT) ? littleParallelism : Math.min(littleParallelism, rhs.littleParallelism));
return true;
}
/*
* Implementation notes:
*
* The requirement that "K extends CopyableValue<K>" can be removed when
* Flink has a self-join and GenerateTriplets is implemented as such.
*
* ProjectTriangles should eventually be replaced by ".projectFirst("*")"
* when projections use code generation.
*/
@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
throws Exception {
// u, v, bitmask where u < v
DataSet<Tuple3<K, K, ByteValue>> filteredByID = input
.getEdges()
.map(new OrderByID<K, EV>())
.setParallelism(littleParallelism)
.name("Order by ID")
.groupBy(0, 1)
.reduceGroup(new ReduceBitmask<K>())
.setParallelism(littleParallelism)
.name("Flatten by ID");
// u, v, (deg(u), deg(v))
DataSet<Edge<K, Tuple3<EV, Degrees, Degrees>>> pairDegrees = input
.run(new EdgeDegreesPair<K, VV, EV>()
.setParallelism(littleParallelism));
// u, v, bitmask where deg(u) < deg(v) or (deg(u) == deg(v) and u < v)
DataSet<Tuple3<K, K, ByteValue>> filteredByDegree = pairDegrees
.map(new OrderByDegree<K, EV>())
.setParallelism(littleParallelism)
.name("Order by degree")
.groupBy(0, 1)
.reduceGroup(new ReduceBitmask<K>())
.setParallelism(littleParallelism)
.name("Flatten by degree");
// u, v, w, bitmask where (u, v) and (u, w) are edges in graph
DataSet<Tuple4<K, K, K, ByteValue>> triplets = filteredByDegree
.groupBy(0)
.sortGroup(1, Order.ASCENDING)
.reduceGroup(new GenerateTriplets<K>())
.name("Generate triplets");
// u, v, w, bitmask where (u, v), (u, w), and (v, w) are edges in graph
DataSet<Result<K>> triangles = triplets
.join(filteredByID, JoinOperatorBase.JoinHint.REPARTITION_HASH_SECOND)
.where(1, 2)
.equalTo(0, 1)
.with(new ProjectTriangles<K>())
.name("Triangle listing");
if (sortTriangleVertices.get()) {
triangles = triangles
.map(new SortTriangleVertices<K>())
.name("Sort triangle vertices");
}
return triangles;
}
/**
* Removes edge values while emitting a Tuple3 where f0 and f1 are,
* respectively, the lesser and greater of the source and target IDs.
* The third field is a bitmask representing the vertex order.
*
* @param <T> ID type
* @param <ET> edge value type
*/
private static final class OrderByID<T extends Comparable<T>, ET>
implements MapFunction<Edge<T, ET>, Tuple3<T, T, ByteValue>> {
private ByteValue forward = new ByteValue(EdgeOrder.FORWARD.getBitmask());
private ByteValue reverse = new ByteValue(EdgeOrder.REVERSE.getBitmask());
private Tuple3<T, T, ByteValue> output = new Tuple3<>();
@Override
public Tuple3<T, T, ByteValue> map(Edge<T, ET> value)
throws Exception {
if (value.f0.compareTo(value.f1) < 0) {
output.f0 = value.f0;
output.f1 = value.f1;
output.f2 = forward;
} else {
output.f0 = value.f1;
output.f1 = value.f0;
output.f2 = reverse;
}
return output;
}
}
/**
* Reduce bitmasks to a single value using bitwise-or.
*
* @param <T> ID type
*/
@ForwardedFields("0; 1")
private static final class ReduceBitmask<T>
implements GroupReduceFunction<Tuple3<T, T, ByteValue>, Tuple3<T, T, ByteValue>> {
@Override
public void reduce(Iterable<Tuple3<T, T, ByteValue>> values, Collector<Tuple3<T, T, ByteValue>> out)
throws Exception {
Tuple3<T, T, ByteValue> output = null;
byte bitmask = 0;
for (Tuple3<T, T, ByteValue> value: values) {
output = value;
bitmask |= value.f2.getValue();
}
output.f2.setValue(bitmask);
out.collect(output);
}
}
/**
* Removes edge values while emitting a Tuple3 where f0 and f1 are,
* respectively, the lesser and greater of the source and target IDs
* by degree count. If the source and target vertex degrees are equal
* then the IDs are compared and emitted in order. The third field is
* a bitmask representing the vertex order.
*
* @param <T> ID type
* @param <ET> edge value type
*/
private static final class OrderByDegree<T extends Comparable<T>, ET>
implements MapFunction<Edge<T, Tuple3<ET, Degrees, Degrees>>, Tuple3<T, T, ByteValue>> {
private ByteValue forward = new ByteValue((byte)(EdgeOrder.FORWARD.getBitmask() << 2));
private ByteValue reverse = new ByteValue((byte)(EdgeOrder.REVERSE.getBitmask() << 2));
private Tuple3<T, T, ByteValue> output = new Tuple3<>();
@Override
public Tuple3<T, T, ByteValue> map(Edge<T, Tuple3<ET, Degrees, Degrees>> value)
throws Exception {
Tuple3<ET, Degrees, Degrees> degrees = value.f2;
long sourceDegree = degrees.f1.getDegree().getValue();
long targetDegree = degrees.f2.getDegree().getValue();
if (sourceDegree < targetDegree ||
(sourceDegree == targetDegree && value.f0.compareTo(value.f1) < 0)) {
output.f0 = value.f0;
output.f1 = value.f1;
output.f2 = forward;
} else {
output.f0 = value.f1;
output.f1 = value.f0;
output.f2 = reverse;
}
return output;
}
}
/**
* Generates the set of triplets by the pairwise enumeration of the open
* neighborhood for each vertex. The number of triplets is quadratic in
* the vertex degree; however, data skew is minimized by only generating
* triplets from the vertex with least degree.
*
* @param <T> ID type
*/
@ForwardedFields("0")
private static final class GenerateTriplets<T extends CopyableValue<T>>
implements GroupReduceFunction<Tuple3<T, T, ByteValue>, Tuple4<T, T, T, ByteValue>> {
private Tuple4<T, T, T, ByteValue> output = new Tuple4<>(null, null, null, new ByteValue());
private List<Tuple2<T, ByteValue>> visited = new ArrayList<>();
@Override
public void reduce(Iterable<Tuple3<T, T, ByteValue>> values, Collector<Tuple4<T, T, T, ByteValue>> out)
throws Exception {
int visitedCount = 0;
Iterator<Tuple3<T, T, ByteValue>> iter = values.iterator();
while (true) {
Tuple3<T, T, ByteValue> edge = iter.next();
byte bitmask = edge.f2.getValue();
output.f0 = edge.f0;
output.f2 = edge.f1;
for (int i = 0; i < visitedCount; i++) {
Tuple2<T, ByteValue> previous = visited.get(i);
output.f1 = previous.f0;
output.f3.setValue((byte)(previous.f1.getValue() | bitmask));
// u, v, w, bitmask
out.collect(output);
}
if (! iter.hasNext()) {
break;
}
byte shiftedBitmask = (byte)(bitmask << 2);
if (visitedCount == visited.size()) {
visited.add(new Tuple2<>(edge.f1.copy(), new ByteValue(shiftedBitmask)));
} else {
Tuple2<T, ByteValue> update = visited.get(visitedCount);
edge.f1.copyTo(update.f0);
update.f1.setValue(shiftedBitmask);
}
visitedCount += 1;
}
}
}
/**
* Simply project the triplet as a triangle while collapsing triplet and edge bitmasks.
*
* @param <T> ID type
*/
@ForwardedFieldsFirst("0; 1; 2")
@ForwardedFieldsSecond("0; 1")
private static final class ProjectTriangles<T>
implements JoinFunction<Tuple4<T, T, T, ByteValue>, Tuple3<T, T, ByteValue>, Result<T>> {
private Result<T> output = new Result<>();
@Override
public Result<T> join(Tuple4<T, T, T, ByteValue> triplet, Tuple3<T, T, ByteValue> edge)
throws Exception {
output.f0 = triplet.f0;
output.f1 = triplet.f1;
output.f2 = triplet.f2;
output.f3.setValue((byte)(triplet.f3.getValue() | edge.f2.getValue()));
return output;
}
}
/**
* Reorders the vertices of each emitted triangle (K0, K1, K2, bitmask)
* into sorted order such that K0 < K1 < K2.
*
* @param <T> ID type
*/
private static final class SortTriangleVertices<T extends Comparable<T>>
implements MapFunction<Result<T>, Result<T>> {
@Override
public Result<T> map(Result<T> value)
throws Exception {
// by the triangle listing algorithm we know f1 < f2
if (value.f0.compareTo(value.f1) > 0) {
byte bitmask = value.f3.getValue();
T temp_val = value.f0;
value.f0 = value.f1;
if (temp_val.compareTo(value.f2) < 0) {
value.f1 = temp_val;
int f0f1 = ((bitmask & 0b100000) >>> 1) | ((bitmask & 0b010000) << 1);
int f0f2 = (bitmask & 0b001100) >>> 2;
int f1f2 = (bitmask & 0b000011) << 2;
value.f3.setValue((byte)(f0f1 | f0f2 | f1f2));
} else {
value.f1 = value.f2;
value.f2 = temp_val;
int f0f1 = (bitmask & 0b000011) << 4;
int f0f2 = ((bitmask & 0b100000) >>> 3) | ((bitmask & 0b010000) >>> 1);
int f1f2 = ((bitmask & 0b001000) >>> 3) | ((bitmask & 0b000100) >>> 1);
value.f3.setValue((byte)(f0f1 | f0f2 | f1f2));
}
}
return value;
}
}
/**
* Wraps {@link Tuple4} to encapsulate results from the directed Triangle Listing algorithm.
*
* @param <T> ID type
*/
public static class Result<T>
extends Tuple4<T, T, T, ByteValue>
implements PrintableResult, TertiaryResult<T> {
/**
* No-args constructor.
*/
public Result() {
f3 = new ByteValue();
}
@Override
public T getVertexId0() {
return f0;
}
@Override
public void setVertexId0(T value) {
f0 = value;
}
@Override
public T getVertexId1() {
return f1;
}
@Override
public void setVertexId1(T value) {
f1 = value;
}
@Override
public T getVertexId2() {
return f2;
}
@Override
public void setVertexId2(T value) {
f2 = value;
}
/**
* Get the bitmask indicating the presence of the six potential
* connecting edges.
*
* @return the edge bitmask
*
* @see EdgeOrder
*/
public ByteValue getBitmask() {
return f3;
}
/**
* Format values into a human-readable string.
*
* @return verbose string
*/
public String toPrintableString() {
byte bitmask = f3.getValue();
return "1st vertex ID: " + getVertexId0()
+ ", 2nd vertex ID: " + getVertexId1()
+ ", 3rd vertex ID: " + getVertexId2()
+ ", edge directions: " + getVertexId0() + maskToString(bitmask, 4) + getVertexId1()
+ ", " + getVertexId0() + maskToString(bitmask, 2) + getVertexId2()
+ ", " + getVertexId1() + maskToString(bitmask, 0) + getVertexId2();
}
private String maskToString(byte mask, int shift) {
int edgeMask = (mask >>> shift) & 0b000011;
if (edgeMask == EdgeOrder.FORWARD.getBitmask()) {
return "->";
} else if (edgeMask == EdgeOrder.REVERSE.getBitmask()) {
return "<-";
} else if (edgeMask == EdgeOrder.MUTUAL.getBitmask()) {
return "<->";
} else {
throw new IllegalArgumentException("Bitmask is missing an edge (mask = "
+ mask + ", shift = " + shift);
}
}
}
}