/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.graph.asm.degree.filter.undirected;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.graph.Edge;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.Vertex;
import org.apache.flink.graph.asm.degree.annotate.undirected.VertexDegree;
import org.apache.flink.graph.utils.proxy.GraphAlgorithmWrappingGraph;
import org.apache.flink.graph.utils.proxy.OptionalBoolean;
import org.apache.flink.types.LongValue;
import org.apache.flink.util.Collector;
import org.apache.flink.util.Preconditions;
import static org.apache.flink.api.common.ExecutionConfig.PARALLELISM_DEFAULT;
/**
* Removes vertices from a graph with degree greater than the given maximum.
* Any edge with with a source or target vertex with degree greater than the
* given maximum is also removed.
*
* @param <K> ID type
* @param <VV> vertex value type
* @param <EV> edge value type
*/
public class MaximumDegree<K, VV, EV>
extends GraphAlgorithmWrappingGraph<K, VV, EV, K, VV, EV> {
// Required configuration
private long maximumDegree;
// Optional configuration
private OptionalBoolean reduceOnTargetId = new OptionalBoolean(false, false);
private OptionalBoolean broadcastHighDegreeVertices = new OptionalBoolean(false, false);
private int parallelism = PARALLELISM_DEFAULT;
/**
* Filter out vertices with degree greater than the given maximum.
*
* @param maximumDegree maximum degree
*/
public MaximumDegree(long maximumDegree) {
Preconditions.checkArgument(maximumDegree > 0, "Maximum degree must be greater than zero");
this.maximumDegree = maximumDegree;
}
/**
* The degree can be counted from either the edge source or target IDs.
* By default the source IDs are counted. Reducing on target IDs may
* optimize the algorithm if the input edge list is sorted by target ID.
*
* @param reduceOnTargetId set to {@code true} if the input edge list
* is sorted by target ID
* @return this
*/
public MaximumDegree<K, VV, EV> setReduceOnTargetId(boolean reduceOnTargetId) {
this.reduceOnTargetId.set(reduceOnTargetId);
return this;
}
/**
* After filtering high-degree vertices this algorithm must perform joins
* on the original graph's vertex set and on both the source and target IDs
* of the edge set. These joins can be performed without shuffling data
* over the network if the high-degree vertices are distributed by a
* broadcast-hash.
*
* @param broadcastHighDegreeVertices set to {@code true} if the high-degree
* vertices should be broadcast when joining
* @return this
*/
public MaximumDegree<K, VV, EV> setBroadcastHighDegreeVertices(boolean broadcastHighDegreeVertices) {
this.broadcastHighDegreeVertices.set(broadcastHighDegreeVertices);
return this;
}
/**
* Override the operator parallelism.
*
* @param parallelism operator parallelism
* @return this
*/
public MaximumDegree<K, VV, EV> setParallelism(int parallelism) {
this.parallelism = parallelism;
return this;
}
@Override
protected String getAlgorithmName() {
return MaximumDegree.class.getName();
}
@Override
protected boolean mergeConfiguration(GraphAlgorithmWrappingGraph other) {
Preconditions.checkNotNull(other);
if (! MaximumDegree.class.isAssignableFrom(other.getClass())) {
return false;
}
MaximumDegree rhs = (MaximumDegree) other;
// verify that configurations can be merged
if (maximumDegree != rhs.maximumDegree) {
return false;
}
// merge configurations
reduceOnTargetId.mergeWith(rhs.reduceOnTargetId);
broadcastHighDegreeVertices.mergeWith(rhs.broadcastHighDegreeVertices);
parallelism = (parallelism == PARALLELISM_DEFAULT) ? rhs.parallelism :
((rhs.parallelism == PARALLELISM_DEFAULT) ? parallelism : Math.min(parallelism, rhs.parallelism));
return true;
}
/*
* Implementation notes:
*
* The three leftOuterJoin below could be implemented more efficiently
* as an anti-join when available in Flink.
*/
@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
throws Exception {
// u, d(u)
DataSet<Vertex<K, LongValue>> vertexDegree = input
.run(new VertexDegree<K, VV, EV>()
.setReduceOnTargetId(reduceOnTargetId.get())
.setParallelism(parallelism));
// u, d(u) if d(u) > maximumDegree
DataSet<Tuple1<K>> highDegreeVertices = vertexDegree
.flatMap(new DegreeFilter<K>(maximumDegree))
.setParallelism(parallelism)
.name("Filter high-degree vertices");
JoinHint joinHint = broadcastHighDegreeVertices.get() ? JoinHint.BROADCAST_HASH_SECOND : JoinHint.REPARTITION_HASH_SECOND;
// Vertices
DataSet<Vertex<K, VV>> vertices = input
.getVertices()
.leftOuterJoin(highDegreeVertices, joinHint)
.where(0)
.equalTo(0)
.with(new ProjectVertex<K, VV>())
.setParallelism(parallelism)
.name("Project low-degree vertices");
// Edges
DataSet<Edge<K, EV>> edges = input
.getEdges()
.leftOuterJoin(highDegreeVertices, joinHint)
.where(reduceOnTargetId.get() ? 1 : 0)
.equalTo(0)
.with(new ProjectEdge<K, EV>())
.setParallelism(parallelism)
.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source"))
.leftOuterJoin(highDegreeVertices, joinHint)
.where(reduceOnTargetId.get() ? 0 : 1)
.equalTo(0)
.with(new ProjectEdge<K, EV>())
.setParallelism(parallelism)
.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));
// Graph
return Graph.fromDataSet(vertices, edges, input.getContext());
}
/**
* Emit vertices with degree greater than the given maximum.
*
* @param <K> ID type
*/
@ForwardedFields("0")
private static class DegreeFilter<K>
implements FlatMapFunction<Vertex<K, LongValue>, Tuple1<K>> {
private long maximumDegree;
private Tuple1<K> output = new Tuple1<>();
public DegreeFilter(long maximumDegree) {
this.maximumDegree = maximumDegree;
}
@Override
public void flatMap(Vertex<K, LongValue> value, Collector<Tuple1<K>> out)
throws Exception {
if (value.f1.getValue() > maximumDegree) {
output.f0 = value.f0;
out.collect(output);
}
}
}
/**
* Project vertex.
*
* @param <T> ID type
* @param <VT> vertex value type
*/
@ForwardedFieldsFirst("0; 1")
private static class ProjectVertex<T, VT>
implements FlatJoinFunction<Vertex<T, VT>, Tuple1<T>, Vertex<T, VT>> {
@Override
public void join(Vertex<T, VT> vertex, Tuple1<T> id, Collector<Vertex<T, VT>> out)
throws Exception {
if (id == null) {
out.collect(vertex);
}
}
}
/**
* Project edge.
*
* @param <T> ID type
* @param <ET> edge value type
*/
@ForwardedFieldsFirst("0; 1; 2")
private static class ProjectEdge<T, ET>
implements FlatJoinFunction<Edge<T, ET>, Tuple1<T>, Edge<T, ET>> {
@Override
public void join(Edge<T, ET> edge, Tuple1<T> id, Collector<Edge<T, ET>> out)
throws Exception {
if (id == null) {
out.collect(edge);
}
}
}
}