/*
* This file is part of Gradoop.
*
* Gradoop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Gradoop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
*/
package org.gradoop.benchmark.patternmatching;
import org.apache.commons.cli.CommandLine;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.id.GradoopIdList;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.io.impl.tlf.TLFDataSource;
import org.gradoop.flink.model.impl.operators.matching.transactional.TransactionalPatternMatching;
import org.gradoop.flink.model.impl.operators.matching.single.PatternMatching;
import org.gradoop.flink.model.impl.operators.matching.transactional.algorithm.DepthSearchMatching;
import org.gradoop.flink.model.impl.operators.matching.transactional.function.FindEmbeddings;
import org.gradoop.flink.model.impl.operators.matching.transactional.function.GraphTransactionMatcher;
import org.gradoop.flink.model.impl.operators.matching.transactional.function.HasEmbeddings;
import org.gradoop.flink.model.impl.operators.matching.transactional.tuples.GraphWithCandidates;
import org.gradoop.flink.util.GradoopFlinkConfig;
import java.util.concurrent.TimeUnit;
/**
* Used to benchmark {@link TransactionalPatternMatching} implementation.
* <p>
* The benchmarks expects the graph to be stored in two different directories:
* <p>
* [inputDir]/vertices -> [unique vertex-id]
* [inputDir]/edges -> [unique edge-id,source-vertex-id,target-vertex-id]
* <p>
* All identifiers must be of type {@link Long}.
*/
public class TransactionalBenchmark extends AbstractRunner {
/**
* Option to declare path to input graph
*/
private static final String OPTION_INPUT_PATH = "i";
/**
* Option to declare path to input graph
*/
private static final String OPTION_QUERY = "q";
/**
* Option to set the traverser
*/
private static final String OPTION_RETURN_EMBEDDINGS = "e";
static {
OPTIONS.addOption(OPTION_INPUT_PATH, "input", true, "Graph directory");
OPTIONS.addOption(OPTION_QUERY, "query", true, "Pattern or fixed query");
OPTIONS.addOption(OPTION_RETURN_EMBEDDINGS, "embeddings", false,
"if embeddings should be returned");
}
/**
* Given a query pattern, the benchmark computes the number of matching
* subgraphs in the given data graph.
* <p>
* This benchmark currently supports structure only pattern. For semantic
* patterns use {@link PatternMatching}.
* <p>
* usage: org.gradoop.benchmark.patternmatching.TraverserBenchmark
* [-i <arg>] [-q <arg>] [-t <arg>]
* -i,--input <arg> Graph directory
* -q,--query <arg> Pattern or fixed query (e.g. q2 or "(a)-->(b)")
* -t,--traverser <arg> [loop|bulk]
*
* @param args program arguments
*/
public static void main(String[] args) throws Exception {
CommandLine cmd =
parseArguments(args, TransactionalBenchmark.class.getName());
if (cmd == null) {
return;
}
performSanityCheck(cmd);
String inputPath = cmd.getOptionValue(OPTION_INPUT_PATH);
String queryString = "query[" + cmd.getOptionValue(OPTION_QUERY) + "]";
boolean returnEmbeddings = cmd.hasOption(OPTION_RETURN_EMBEDDINGS);
ExecutionEnvironment env = getExecutionEnvironment();
TLFDataSource source =
new TLFDataSource(inputPath, GradoopFlinkConfig.createConfig(env));
DataSet<GraphWithCandidates> graphs =
source.getGraphTransactions().getTransactions()
.map(new GraphTransactionMatcher(queryString));
if (returnEmbeddings) {
DataSet<Tuple4<GradoopId, GradoopId, GradoopIdList, GradoopIdList>> embeddings =
graphs.flatMap(
new FindEmbeddings(new DepthSearchMatching(), queryString));
long embeddingCount = embeddings.count();
System.out.println("embeddingCount = " + embeddingCount);
} else {
DataSet<Tuple2<GradoopId, Boolean>> containment =
graphs.map(new HasEmbeddings(
new DepthSearchMatching(), queryString))
.filter(new SecondFieldTrue<>());
long containmentCount = containment.count();
System.out.println(
"containmentCount = " + containmentCount);
}
System.out.println(String.format("Net runtime [s]: %d",
env
.getLastJobExecutionResult()
.getNetRuntime(TimeUnit.SECONDS)));
}
/**
* Checks if the minimum of arguments is provided
*
* @param cmd command line
*/
private static void performSanityCheck(final CommandLine cmd) {
if (!cmd.hasOption(OPTION_INPUT_PATH)) {
throw new IllegalArgumentException("Define a graph input directory.");
}
if (!cmd.hasOption(OPTION_QUERY)) {
throw new IllegalArgumentException("Define an graph output directory.");
}
}
/**
* Filter that checks if the second field is True.
* @param <T> any type
*/
private static class SecondFieldTrue<T> implements
FilterFunction<Tuple2<T, Boolean>> {
@Override
public boolean filter(Tuple2<T, Boolean> tuple2) throws Exception {
return tuple2.f1;
}
}
}