/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.benchmark.patternmatching; import org.apache.commons.cli.CommandLine; import org.apache.commons.io.FileUtils; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.java.functions.FunctionAnnotation; import org.apache.flink.api.java.tuple.Tuple1; import org.apache.flink.api.java.tuple.Tuple3; import org.gradoop.examples.AbstractRunner; import org.gradoop.flink.model.impl.operators.matching.common.query.DFSTraverser; import org.gradoop.flink.model.impl.operators.matching.common.query.QueryHandler; import org.gradoop.flink.model.impl.operators.matching.common.query.TraversalCode; import org.gradoop.flink.model.impl.operators.matching.common.query.Traverser; import org.gradoop.flink.model.impl.operators.matching.common.tuples.IdWithCandidates; import org.gradoop.flink.model.impl.operators.matching.common.tuples.TripleWithCandidates; import org.gradoop.flink.model.impl.operators.matching.single.preserving.explorative.traverser.TraverserStrategy; import java.io.File; import java.io.IOException; import java.io.PrintWriter; /** * Base class for traverser benchmarks */ abstract class TraverserBenchmark extends AbstractRunner { /** * Option to declare path to input graph */ private static final String OPTION_INPUT_PATH = "i"; /** * Option to declare path to input graph */ private static final String OPTION_QUERY = "q"; /** * Option to set the traverser */ private static final String OPTION_TRAVERSER = "t"; /** * Path to CSV log file */ private static final String OPTION_CSV_PATH = "csv"; static { OPTIONS.addOption(OPTION_INPUT_PATH, "input", true, "Graph directory"); OPTIONS.addOption(OPTION_QUERY, "query", true, "Pattern or fixed query"); OPTIONS.addOption(OPTION_TRAVERSER, "traverser", true, "[set-pair-for|set-pair-bulk|triple-for]"); OPTIONS.addOption(OPTION_CSV_PATH, "csv-path", true, "Path to output CSV file"); } /** * Path to input graph data */ private final String inputPath; /** * Query to execute. */ private final String query; /** * Traverser strategy */ private final TraverserStrategy traverserStrategy; /** * Path to CSV output */ private String csvPath; /** * Number of query vertices */ private int vertexCount; /** * Number of query edges */ private int edgeCount; /** * Number of embeddings found by the traverser */ private long embeddingCount; /** * Traversal code to process the query. */ private TraversalCode tc; /** * Constructor * * @param cmd commandline */ TraverserBenchmark(CommandLine cmd) { this.inputPath = cmd.getOptionValue(OPTION_INPUT_PATH); this.query = cmd.getOptionValue(OPTION_QUERY); String traverserStrategyString = cmd.getOptionValue(OPTION_TRAVERSER).toLowerCase(); switch (traverserStrategyString) { case "set-pair-bulk": this.traverserStrategy = TraverserStrategy.SET_PAIR_BULK_ITERATION; break; case "set-pair-for": this.traverserStrategy = TraverserStrategy.SET_PAIR_FOR_LOOP_ITERATION; break; case "triple-for": this.traverserStrategy = TraverserStrategy.TRIPLES_FOR_LOOP_ITERATION; break; default: throw new IllegalArgumentException("Unknown traverser strategy: " + traverserStrategyString); } if (cmd.hasOption(OPTION_CSV_PATH)) { csvPath = cmd.getOptionValue(OPTION_CSV_PATH); } initialize(); } /** * Initialize the benchmark using a given query. The query can be a predefined one (e.g. q0) or * a GDL pattern (e.g. (a)-->(b)). */ private void initialize() { if (query.toLowerCase().startsWith("q")) { // fixed query Queries.Query q = getQuery(query.toLowerCase()); vertexCount = q.getVertexCount(); edgeCount = q.getEdgeCount(); tc = q.getTraversalCode(); } else { // GDL query pattern QueryHandler queryHandler = new QueryHandler(query); vertexCount = queryHandler.getVertexCount(); edgeCount = queryHandler.getEdgeCount(); Traverser traverser = new DFSTraverser(); traverser.setQueryHandler(queryHandler); tc = traverser.traverse(); } } /** * Returns a string containing information about the benchmark run. * * @return benchmark result string */ private String getResultString() { return String.format("%s|%s|%s|%s|%s|%s", inputPath, getExecutionEnvironment().getParallelism(), traverserStrategy.name(), query, embeddingCount, getExecutionEnvironment().getLastJobExecutionResult().getNetRuntime()); } /** * Writes the results of the benchmark into the given csv file. If the file already exists, * the results are appended. * * @param csvFile path to csv file * @throws IOException */ private void writeResults(String csvFile) throws IOException { String header = "Input|Parallelism|Strategy|Query|Embeddings|Runtime[ms]"; String line = getResultString(); File f = new File(csvFile); if (f.exists() && !f.isDirectory()) { FileUtils.writeStringToFile(f, String.format("%s%n", line), true); } else { PrintWriter writer = new PrintWriter(csvFile, "UTF-8"); writer.println(header); writer.println(line); writer.close(); } } /** * Prints the results of the benchmark to system out. */ private void printResults() { System.out.println(getResultString()); } int getVertexCount() { return vertexCount; } int getEdgeCount() { return edgeCount; } TraversalCode getTraversalCode() { return tc; } String getInputPath() { return inputPath; } void setEmbeddingCount(long embeddingCount) { this.embeddingCount = embeddingCount; } TraverserStrategy getTraverserStrategy() { return traverserStrategy; } /** * Run the benchmark. */ abstract void run() throws Exception; /** * Writes the results to file or prints it. * * @throws IOException */ void close() throws IOException { if (csvPath != null) { writeResults(csvPath); } else { printResults(); } } /** * Returns the query based on the input string * * @param queryString query identifier (q1, q2, ..., q7) * @return query */ private static Queries.Query getQuery(String queryString) { Queries.Query query; switch (queryString) { case "q0": query = Queries.q0(); break; case "q1": query = Queries.q1(); break; case "q2": query = Queries.q2(); break; case "q3": query = Queries.q3(); break; case "q4": query = Queries.q4(); break; case "q5": query = Queries.q5(); break; case "q6": query = Queries.q6(); break; case "q7": query = Queries.q7(); break; case "q8": query = Queries.q8(); break; case "q9": query = Queries.q9(); break; default: throw new IllegalArgumentException("unsupported query: " + queryString); } return query; } /** * Initializes {@link IdWithCandidates} */ @FunctionAnnotation.ForwardedFields("f0") public static class GetIdWithCandidates implements MapFunction<Tuple1<Long>, IdWithCandidates<Long>> { /** * Reduce object instantiations */ private final IdWithCandidates<Long> reuseTuple; /** * Constructor * * @param vertexCount number of query vertices */ public GetIdWithCandidates(int vertexCount) { reuseTuple = new IdWithCandidates<>(); boolean[] candidates = new boolean[vertexCount]; for (int i = 0; i < vertexCount; i++) { candidates[i] = true; } reuseTuple.setCandidates(candidates); } @Override public IdWithCandidates<Long> map(Tuple1<Long> value) throws Exception { reuseTuple.setId(value.f0); return reuseTuple; } } /** * Initializes {@link IdWithCandidates} */ @FunctionAnnotation.ForwardedFields("f0;f1;f2") public static class GetTriplesWithCandidates implements MapFunction<Tuple3<Long, Long, Long>, TripleWithCandidates<Long>> { /** * Reduce object instantiations */ private final TripleWithCandidates<Long> reuseTuple; /** * Constructor * * @param edgeCount number of query edges */ public GetTriplesWithCandidates(int edgeCount) { reuseTuple = new TripleWithCandidates<>(); boolean[] candidates = new boolean[edgeCount]; for (int i = 0; i < edgeCount; i++) { candidates[i] = true; } reuseTuple.setCandidates(candidates); } @Override public TripleWithCandidates<Long> map( Tuple3<Long, Long, Long> value) throws Exception { reuseTuple.setEdgeId(value.f0); reuseTuple.setSourceId(value.f1); reuseTuple.setTargetId(value.f2); return reuseTuple; } } }