/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.graph; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.text.StrBuilder; import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.io.CsvOutputFormat; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.client.program.ProgramParametrizationException; import org.apache.flink.graph.drivers.AdamicAdar; import org.apache.flink.graph.drivers.ClusteringCoefficient; import org.apache.flink.graph.drivers.ConnectedComponents; import org.apache.flink.graph.drivers.Driver; import org.apache.flink.graph.drivers.EdgeList; import org.apache.flink.graph.drivers.GraphMetrics; import org.apache.flink.graph.drivers.HITS; import org.apache.flink.graph.drivers.JaccardIndex; import org.apache.flink.graph.drivers.PageRank; import org.apache.flink.graph.drivers.TriangleListing; import org.apache.flink.graph.drivers.input.CirculantGraph; import org.apache.flink.graph.drivers.input.CompleteGraph; import org.apache.flink.graph.drivers.input.CycleGraph; import org.apache.flink.graph.drivers.input.EchoGraph; import org.apache.flink.graph.drivers.input.EmptyGraph; import org.apache.flink.graph.drivers.input.GridGraph; import org.apache.flink.graph.drivers.input.HypercubeGraph; import org.apache.flink.graph.drivers.input.Input; import org.apache.flink.graph.drivers.input.PathGraph; import org.apache.flink.graph.drivers.input.RMatGraph; import org.apache.flink.graph.drivers.input.SingletonEdgeGraph; import org.apache.flink.graph.drivers.input.StarGraph; import org.apache.flink.graph.drivers.output.Hash; import org.apache.flink.graph.drivers.output.Print; import org.apache.flink.graph.drivers.parameter.Parameterized; import org.apache.flink.util.InstantiationUtil; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * This default main class executes Flink drivers. * * An execution has one input, one algorithm, and one output. Anything more * complex can be expressed as a user program written in a JVM language. * * Inputs and algorithms are decoupled by, respectively, producing and * consuming a graph. Currently only {@code Graph} is supported but later * updates may add support for new graph types such as {@code BipartiteGraph}. * * Algorithms must explicitly support each type of output via implementation of * interfaces. This is scalable as the number of outputs is small and finite. */ public class Runner { private static final String INPUT = "input"; private static final String ALGORITHM = "algorithm"; private static final String OUTPUT = "output"; private static ParameterizedFactory<Input> inputFactory = new ParameterizedFactory<Input>() .addClass(CirculantGraph.class) .addClass(CompleteGraph.class) .addClass(org.apache.flink.graph.drivers.input.CSV.class) .addClass(CycleGraph.class) .addClass(EchoGraph.class) .addClass(EmptyGraph.class) .addClass(GridGraph.class) .addClass(HypercubeGraph.class) .addClass(PathGraph.class) .addClass(RMatGraph.class) .addClass(SingletonEdgeGraph.class) .addClass(StarGraph.class); private static ParameterizedFactory<Driver> driverFactory = new ParameterizedFactory<Driver>() .addClass(AdamicAdar.class) .addClass(ClusteringCoefficient.class) .addClass(ConnectedComponents.class) .addClass(EdgeList.class) .addClass(GraphMetrics.class) .addClass(HITS.class) .addClass(JaccardIndex.class) .addClass(PageRank.class) .addClass(TriangleListing.class); /** * List available algorithms. This is displayed to the user when no valid * algorithm is given in the program parameterization. * * @return usage string listing available algorithms */ private static String getAlgorithmsListing() { StrBuilder strBuilder = new StrBuilder(); strBuilder .appendNewLine() .appendln("Select an algorithm to view usage: flink run examples/flink-gelly-examples_<version>.jar --algorithm <algorithm>") .appendNewLine() .appendln("Available algorithms:"); for (Driver algorithm : driverFactory) { strBuilder.append(" ") .appendFixedWidthPadRight(algorithm.getName(), 30, ' ') .append(algorithm.getShortDescription()).appendNewLine(); } return strBuilder.toString(); } /** * Display the usage for the given algorithm. This includes options for all * compatible inputs, the selected algorithm, and outputs implemented by * the selected algorithm. * * @param algorithmName unique identifier of the selected algorithm * @return usage string for the given algorithm */ private static String getAlgorithmUsage(String algorithmName) { StrBuilder strBuilder = new StrBuilder(); Driver algorithm = driverFactory.get(algorithmName); strBuilder .appendNewLine() .appendNewLine() .appendln(algorithm.getLongDescription()) .appendNewLine() .append("usage: flink run examples/flink-gelly-examples_<version>.jar --algorithm ") .append(algorithmName) .append(" [algorithm options] --input <input> [input options] --output <output> [output options]") .appendNewLine() .appendNewLine() .appendln("Available inputs:"); for (Input input : inputFactory) { strBuilder .append(" --input ") .append(input.getName()) .append(" ") .appendln(input.getUsage()); } String algorithmParameterization = algorithm.getUsage(); if (algorithmParameterization.length() > 0) { strBuilder .appendNewLine() .appendln("Algorithm configuration:") .append(" ") .appendln(algorithm.getUsage()); } strBuilder .appendNewLine() .appendln("Available outputs:"); if (algorithm instanceof org.apache.flink.graph.drivers.output.CSV) { strBuilder.appendln(" --output csv --output_filename FILENAME [--output_line_delimiter LINE_DELIMITER] [--output_field_delimiter FIELD_DELIMITER]"); } if (algorithm instanceof Hash) { strBuilder.appendln(" --output hash"); } if (algorithm instanceof Print) { strBuilder.appendln(" --output print"); } return strBuilder .appendNewLine() .toString(); } public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); ExecutionConfig config = env.getConfig(); // should not have any non-Flink data types config.disableAutoTypeRegistration(); config.disableForceAvro(); config.disableForceKryo(); ParameterTool parameters = ParameterTool.fromArgs(args); config.setGlobalJobParameters(parameters); // integration tests run with with object reuse both disabled and enabled if (parameters.has("__disable_object_reuse")) { config.disableObjectReuse(); } else { config.enableObjectReuse(); } // Usage if (!parameters.has(ALGORITHM)) { throw new ProgramParametrizationException(getAlgorithmsListing()); } String algorithmName = parameters.get(ALGORITHM); Driver algorithm = driverFactory.get(algorithmName); if (algorithm == null) { throw new ProgramParametrizationException("Unknown algorithm name: " + algorithmName); } if (!parameters.has(INPUT)) { if (!parameters.has(OUTPUT)) { // if neither input nor output is given then print algorithm usage throw new ProgramParametrizationException(getAlgorithmUsage(algorithmName)); } throw new ProgramParametrizationException("No input given"); } String inputName = parameters.get(INPUT); Input input = inputFactory.get(inputName); if (input == null) { throw new ProgramParametrizationException("Unknown input type: " + inputName); } // Input try { input.configure(parameters); } catch (RuntimeException ex) { throw new ProgramParametrizationException(ex.getMessage()); } Graph graph = input.create(env); // Algorithm algorithm.configure(parameters); algorithm.plan(graph); // Output if (!parameters.has(OUTPUT)) { throw new ProgramParametrizationException("No output given"); } String outputName = parameters.get(OUTPUT); String executionNamePrefix = input.getIdentity() + " -> " + algorithmName + " -> "; System.out.println(); switch (outputName.toLowerCase()) { case "csv": if (algorithm instanceof org.apache.flink.graph.drivers.output.CSV) { String filename = parameters.getRequired("output_filename"); String lineDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); org.apache.flink.graph.drivers.output.CSV c = (org.apache.flink.graph.drivers.output.CSV) algorithm; c.writeCSV(filename, lineDelimiter, fieldDelimiter); env.execute(executionNamePrefix + "CSV"); } else { throw new ProgramParametrizationException("Algorithm does not support output type 'CSV'"); } break; case "hash": if (algorithm instanceof Hash) { Hash h = (Hash) algorithm; h.hash(executionNamePrefix + "Hash"); } else { throw new ProgramParametrizationException("Algorithm does not support output type 'hash'"); } break; case "print": if (algorithm instanceof Print) { Print h = (Print) algorithm; h.print(executionNamePrefix + "Print"); } else { throw new ProgramParametrizationException("Algorithm does not support output type 'print'"); } break; default: throw new ProgramParametrizationException("Unknown output type: " + outputName); } } /** * Stores a list of classes for which an instance can be requested by name * and implements an iterator over class instances. * * @param <T> base type for stored classes */ private static class ParameterizedFactory<T extends Parameterized> implements Iterable<T> { private List<Class<? extends T>> classes = new ArrayList<>(); /** * Add a class to the factory. * * @param cls subclass of T * @return this */ public ParameterizedFactory<T> addClass(Class<? extends T> cls) { this.classes.add(cls); return this; } /** * Obtain a class instance by name. * * @param name String matching getName() * @return class instance or null if no matching class */ public T get(String name) { for (T instance : this) { if (name.equals(instance.getName())) { return instance; } } return null; } @Override public Iterator<T> iterator() { return new Iterator<T>() { private int index; @Override public boolean hasNext() { return index < classes.size(); } @Override public T next() { return InstantiationUtil.instantiate(classes.get(index++)); } @Override public void remove() { throw new UnsupportedOperationException(); } }; } } }