/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.example.java.graph;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import eu.stratosphere.api.common.operators.Order;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.functions.GroupReduceFunction;
import eu.stratosphere.api.java.functions.JoinFunction;
import eu.stratosphere.api.java.functions.MapFunction;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.example.java.graph.util.EnumTrianglesData;
import eu.stratosphere.example.java.graph.util.EnumTrianglesDataTypes.Edge;
import eu.stratosphere.example.java.graph.util.EnumTrianglesDataTypes.Triad;
import eu.stratosphere.util.Collector;
/**
* Triangle enumeration is a preprocessing step to find closely connected parts in graphs.
* A triangle consists of three edges that connect three vertices with each other.
*
* <p>
* The algorithm works as follows:
* It groups all edges that share a common vertex and builds triads, i.e., triples of vertices
* that are connected by two edges. Finally, all triads are filtered for which no third edge exists
* that closes the triangle.
*
* <p>
* Input files are plain text files and must be formatted as follows:
* <ul>
* <li>Edges are represented as pairs for vertex IDs which are separated by space
* characters. Edges are separated by new-line characters.<br>
* For example <code>"1 2\n2 12\n1 12\n42 63\n"</code> gives four (undirected) edges (1)-(2), (2)-(12), (1)-(12), and (42)-(63)
* that include a triangle
* </ul>
* <pre>
* (1)
* / \
* (2)-(12)
* </pre>
*
* Usage: <code>EnumTriangleBasic <edge path> <result path></code><br>
* If no parameters are provided, the program is run with default data from {@link EnumTrianglesData}.
*
* <p>
* This example shows how to use:
* <ul>
* <li>Custom Java objects which extend Tuple
* <li>Group Sorting
* </ul>
*
*/
@SuppressWarnings("serial")
public class EnumTrianglesBasic {
static boolean fileOutput = false;
static String edgePath = null;
static String outputPath = null;
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if(!parseParameters(args)) {
return;
}
// set up execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read input data
DataSet<Edge> edges = getEdgeDataSet(env);
// project edges by vertex id
DataSet<Edge> edgesById = edges
.map(new EdgeByIdProjector());
DataSet<Triad> triangles = edgesById
// build triads
.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
// filter triads
.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());
// emit result
if(fileOutput) {
triangles.writeAsCsv(outputPath, "\n", ",");
} else {
triangles.print();
}
// execute program
env.execute("Basic Triangle Enumeration Example");
}
// *************************************************************************
// USER FUNCTIONS
// *************************************************************************
/** Converts a Tuple2 into an Edge */
public static class TupleEdgeConverter extends MapFunction<Tuple2<Integer, Integer>, Edge> {
private final Edge outEdge = new Edge();
@Override
public Edge map(Tuple2<Integer, Integer> t) throws Exception {
outEdge.copyVerticesFromTuple2(t);
return outEdge;
}
}
/** Projects an edge (pair of vertices) such that the id of the first is smaller than the id of the second. */
private static class EdgeByIdProjector extends MapFunction<Edge, Edge> {
@Override
public Edge map(Edge inEdge) throws Exception {
// flip vertices if necessary
if(inEdge.getFirstVertex() > inEdge.getSecondVertex()) {
inEdge.flipVertices();
}
return inEdge;
}
}
/**
* Builds triads (triples of vertices) from pairs of edges that share a vertex.
* The first vertex of a triad is the shared vertex, the second and third vertex are ordered by vertexId.
* Assumes that input edges share the first vertex and are in ascending order of the second vertex.
*/
private static class TriadBuilder extends GroupReduceFunction<Edge, Triad> {
private final List<Integer> vertices = new ArrayList<Integer>();
private final Triad outTriad = new Triad();
@Override
public void reduce(Iterator<Edge> edges, Collector<Triad> out) throws Exception {
// clear vertex list
vertices.clear();
// read first edge
Edge firstEdge = edges.next();
outTriad.setFirstVertex(firstEdge.getFirstVertex());
vertices.add(firstEdge.getSecondVertex());
// build and emit triads
while(edges.hasNext()) {
Integer higherVertexId = edges.next().getSecondVertex();
// combine vertex with all previously read vertices
for(Integer lowerVertexId : vertices) {
outTriad.setSecondVertex(lowerVertexId);
outTriad.setThirdVertex(higherVertexId);
out.collect(outTriad);
}
vertices.add(higherVertexId);
}
}
}
/** Filters triads (three vertices connected by two edges) without a closing third edge. */
private static class TriadFilter extends JoinFunction<Triad, Edge, Triad> {
@Override
public Triad join(Triad triad, Edge edge) throws Exception {
return triad;
}
}
// *************************************************************************
// UTIL METHODS
// *************************************************************************
private static boolean parseParameters(String[] args) {
if(args.length > 0) {
// parse input arguments
fileOutput = true;
if(args.length == 2) {
edgePath = args[0];
outputPath = args[1];
} else {
System.err.println("Usage: EnumTriangleBasic <edge path> <result path>");
return false;
}
} else {
System.out.println("Executing Enum Triangles Basic example with built-in default data.");
System.out.println(" Provide parameters to read input data from files.");
System.out.println(" See the documentation for the correct format of input files.");
System.out.println(" Usage: EnumTriangleBasic <edge path> <result path>");
}
return true;
}
private static DataSet<Edge> getEdgeDataSet(ExecutionEnvironment env) {
if(fileOutput) {
return env.readCsvFile(edgePath)
.fieldDelimiter(' ')
.includeFields(true, true)
.types(Integer.class, Integer.class)
.map(new TupleEdgeConverter());
} else {
return EnumTrianglesData.getDefaultEdgeDataSet(env);
}
}
}