TraverserBenchmark.java example

Explorer
gradoop-master
/*
 * This file is part of Gradoop.
 *
 * Gradoop is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Gradoop is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
 */

package org.gradoop.benchmark.patternmatching;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.io.FileUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.FunctionAnnotation;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple3;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.model.impl.operators.matching.common.query.DFSTraverser;
import org.gradoop.flink.model.impl.operators.matching.common.query.QueryHandler;
import org.gradoop.flink.model.impl.operators.matching.common.query.TraversalCode;
import org.gradoop.flink.model.impl.operators.matching.common.query.Traverser;
import org.gradoop.flink.model.impl.operators.matching.common.tuples.IdWithCandidates;
import org.gradoop.flink.model.impl.operators.matching.common.tuples.TripleWithCandidates;
import org.gradoop.flink.model.impl.operators.matching.single.preserving.explorative.traverser.TraverserStrategy;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * Base class for traverser benchmarks
 */
abstract class TraverserBenchmark extends AbstractRunner {
  /**
   * Option to declare path to input graph
   */
  private static final String OPTION_INPUT_PATH = "i";
  /**
   * Option to declare path to input graph
   */
  private static final String OPTION_QUERY = "q";
  /**
   * Option to set the traverser
   */
  private static final String OPTION_TRAVERSER = "t";
  /**
   * Path to CSV log file
   */
  private static final String OPTION_CSV_PATH = "csv";

  static {
    OPTIONS.addOption(OPTION_INPUT_PATH, "input", true, "Graph directory");
    OPTIONS.addOption(OPTION_QUERY, "query", true, "Pattern or fixed query");
    OPTIONS.addOption(OPTION_TRAVERSER, "traverser", true,
      "[set-pair-for|set-pair-bulk|triple-for]");
    OPTIONS.addOption(OPTION_CSV_PATH, "csv-path", true, "Path to output CSV file");
  }

  /**
   * Path to input graph data
   */
  private final String inputPath;
  /**
   * Query to execute.
   */
  private final String query;
  /**
   * Traverser strategy
   */
  private final TraverserStrategy traverserStrategy;
  /**
   * Path to CSV output
   */
  private String csvPath;

  /**
   * Number of query vertices
   */
  private int vertexCount;
  /**
   * Number of query edges
   */
  private int edgeCount;
  /**
   * Number of embeddings found by the traverser
   */
  private long embeddingCount;
  /**
   * Traversal code to process the query.
   */
  private TraversalCode tc;

  /**
   * Constructor
   *
   * @param cmd commandline
   */
  TraverserBenchmark(CommandLine cmd) {
    this.inputPath = cmd.getOptionValue(OPTION_INPUT_PATH);
    this.query = cmd.getOptionValue(OPTION_QUERY);

    String traverserStrategyString = cmd.getOptionValue(OPTION_TRAVERSER).toLowerCase();

    switch (traverserStrategyString) {
    case "set-pair-bulk":
      this.traverserStrategy = TraverserStrategy.SET_PAIR_BULK_ITERATION;
      break;
    case "set-pair-for":
      this.traverserStrategy = TraverserStrategy.SET_PAIR_FOR_LOOP_ITERATION;
      break;
    case "triple-for":
      this.traverserStrategy = TraverserStrategy.TRIPLES_FOR_LOOP_ITERATION;
      break;
    default:
      throw new IllegalArgumentException("Unknown traverser strategy: " + traverserStrategyString);
    }

    if (cmd.hasOption(OPTION_CSV_PATH)) {
      csvPath = cmd.getOptionValue(OPTION_CSV_PATH);
    }
    initialize();
  }

  /**
   * Initialize the benchmark using a given query. The query can be a predefined one (e.g. q0) or
   * a GDL pattern (e.g. (a)-->(b)).
   */
  private void initialize() {
    if (query.toLowerCase().startsWith("q")) {
      // fixed query
      Queries.Query q = getQuery(query.toLowerCase());
      vertexCount = q.getVertexCount();
      edgeCount = q.getEdgeCount();
      tc = q.getTraversalCode();
    } else {
      // GDL query pattern
      QueryHandler queryHandler = new QueryHandler(query);
      vertexCount = queryHandler.getVertexCount();
      edgeCount = queryHandler.getEdgeCount();
      Traverser traverser = new DFSTraverser();
      traverser.setQueryHandler(queryHandler);
      tc = traverser.traverse();
    }
  }

  /**
   * Returns a string containing information about the benchmark run.
   *
   * @return benchmark result string
   */
  private String getResultString() {
    return String.format("%s|%s|%s|%s|%s|%s",
      inputPath,
      getExecutionEnvironment().getParallelism(),
      traverserStrategy.name(),
      query,
      embeddingCount,
      getExecutionEnvironment().getLastJobExecutionResult().getNetRuntime());
  }

  /**
   * Writes the results of the benchmark into the given csv file. If the file already exists,
   * the results are appended.
   *
   * @param csvFile path to csv file
   * @throws IOException
   */
  private void writeResults(String csvFile) throws IOException {
    String header = "Input|Parallelism|Strategy|Query|Embeddings|Runtime[ms]";
    String line = getResultString();

    File f = new File(csvFile);
    if (f.exists() && !f.isDirectory()) {
      FileUtils.writeStringToFile(f, String.format("%s%n", line), true);
    } else {
      PrintWriter writer = new PrintWriter(csvFile, "UTF-8");
      writer.println(header);
      writer.println(line);
      writer.close();
    }
  }

  /**
   * Prints the results of the benchmark to system out.
   */
  private void printResults() {
    System.out.println(getResultString());
  }

  int getVertexCount() {
    return vertexCount;
  }

  int getEdgeCount() {
    return edgeCount;
  }

  TraversalCode getTraversalCode() {
    return tc;
  }

  String getInputPath() {
    return inputPath;
  }

  void setEmbeddingCount(long embeddingCount) {
    this.embeddingCount = embeddingCount;
  }

  TraverserStrategy getTraverserStrategy() {
    return traverserStrategy;
  }


  /**
   * Run the benchmark.
   */
  abstract void run() throws Exception;

  /**
   * Writes the results to file or prints it.
   *
   * @throws IOException
   */
  void close() throws IOException {
    if (csvPath != null) {
      writeResults(csvPath);
    } else {
      printResults();
    }
  }

  /**
   * Returns the query based on the input string
   *
   * @param queryString query identifier (q1, q2, ..., q7)
   * @return query
   */
  private static Queries.Query getQuery(String queryString) {
    Queries.Query query;
    switch (queryString) {
    case "q0":
      query = Queries.q0();
      break;
    case "q1":
      query = Queries.q1();
      break;
    case "q2":
      query = Queries.q2();
      break;
    case "q3":
      query = Queries.q3();
      break;
    case "q4":
      query = Queries.q4();
      break;
    case "q5":
      query = Queries.q5();
      break;
    case "q6":
      query = Queries.q6();
      break;
    case "q7":
      query = Queries.q7();
      break;
    case "q8":
      query = Queries.q8();
      break;
    case "q9":
      query = Queries.q9();
      break;
    default:
      throw new IllegalArgumentException("unsupported query: " + queryString);
    }
    return query;
  }

  /**
   * Initializes {@link IdWithCandidates}
   */
  @FunctionAnnotation.ForwardedFields("f0")
  public static class GetIdWithCandidates
    implements MapFunction<Tuple1<Long>, IdWithCandidates<Long>> {
    /**
     * Reduce object instantiations
     */
    private final IdWithCandidates<Long> reuseTuple;

    /**
     * Constructor
     *
     * @param vertexCount number of query vertices
     */
    public GetIdWithCandidates(int vertexCount) {
      reuseTuple = new IdWithCandidates<>();
      boolean[] candidates = new boolean[vertexCount];
      for (int i = 0; i < vertexCount; i++) {
        candidates[i] = true;
      }
      reuseTuple.setCandidates(candidates);
    }

    @Override
    public IdWithCandidates<Long> map(Tuple1<Long> value) throws Exception {
      reuseTuple.setId(value.f0);
      return reuseTuple;
    }
  }

  /**
   * Initializes {@link IdWithCandidates}
   */
  @FunctionAnnotation.ForwardedFields("f0;f1;f2")
  public static class GetTriplesWithCandidates implements
    MapFunction<Tuple3<Long, Long, Long>, TripleWithCandidates<Long>> {
    /**
     * Reduce object instantiations
     */
    private final TripleWithCandidates<Long> reuseTuple;

    /**
     * Constructor
     *
     * @param edgeCount number of query edges
     */
    public GetTriplesWithCandidates(int edgeCount) {
      reuseTuple = new TripleWithCandidates<>();
      boolean[] candidates = new boolean[edgeCount];
      for (int i = 0; i < edgeCount; i++) {
        candidates[i] = true;
      }
      reuseTuple.setCandidates(candidates);
    }

    @Override
    public TripleWithCandidates<Long> map(
      Tuple3<Long, Long, Long> value) throws Exception {
      reuseTuple.setEdgeId(value.f0);
      reuseTuple.setSourceId(value.f1);
      reuseTuple.setTargetId(value.f2);
      return reuseTuple;
    }
  }
}