/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.examples.patternmatching; import org.apache.commons.cli.CommandLine; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.ProgramDescription; import org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint; import org.apache.hadoop.conf.Configuration; import org.gradoop.examples.AbstractRunner; import org.gradoop.flink.model.impl.GraphCollection; import org.gradoop.flink.model.impl.LogicalGraph; import org.gradoop.flink.model.impl.operators.matching.common.statistics.GraphStatistics; import org.gradoop.flink.model.impl.operators.matching.common.statistics.GraphStatisticsHDFSReader; import org.gradoop.flink.model.impl.operators.matching.common.statistics.GraphStatisticsLocalFSReader; import org.gradoop.flink.model.impl.operators.matching.single.PatternMatching; import org.gradoop.flink.model.impl.operators.matching.common.MatchStrategy; import org.gradoop.flink.model.impl.operators.matching.single.cypher.CypherPatternMatching; import org.gradoop.flink.model.impl.operators.matching.single.preserving.explorative.ExplorativePatternMatching; import org.gradoop.flink.model.impl.operators.matching.single.preserving.explorative.traverser.TraverserStrategy; import org.gradoop.flink.model.impl.operators.matching.single.simulation.dual.DualSimulation; import java.util.concurrent.TimeUnit; import static org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint.BROADCAST_HASH_FIRST; /** * This program can be used to run the different pattern matching engines implemented in Gradoop. */ public class PatternMatchingRunner extends AbstractRunner implements ProgramDescription { /** * Refers to {@link DualSimulation} using bulk iteration */ private static final String ALGO_DUAL_BULK = "dual-bulk"; /** * Refers to {@link DualSimulation} using delta iteration */ private static final String ALGO_DUAL_DELTA = "dual-delta"; /** * Refers to {@link ExplorativePatternMatching} */ private static final String ALGO_ISO_EXP = "iso-exp"; /** * Refers to {@link ExplorativePatternMatching} using * BROADCAST_HASH_FIRST as {@link JoinHint} for extending embeddings. */ private static final String ALGO_ISO_EXP_BC_HASH_FIRST = "iso-exp-bc-hf"; /** * Refers to {@link CypherPatternMatching}. */ private static final String ALGO_CYPHER = "cypher"; /** * Used for console output */ private static final String[] AVAILABLE_ALGORITHMS = new String[] { ALGO_DUAL_BULK, ALGO_DUAL_DELTA, ALGO_ISO_EXP, ALGO_ISO_EXP_BC_HASH_FIRST, ALGO_CYPHER }; /** * Option to declare path to input graph */ private static final String OPTION_INPUT_PATH = "i"; /** * Option to declare the format of the input path (csv,json) */ private static final String OPTION_INPUT_FORMAT = "f"; /** *Option to declare path to output graph */ private static final String OPTION_OUTPUT_PATH = "o"; /** * GDL/Cypher query string */ private static final String OPTION_QUERY_GRAPH = "q"; /** * Pattern Matching algorithm */ private static final String OPTION_ALGORITHM = "a"; /** * Attach original data true/false */ private static final String OPTION_ATTACH_DATA = "d"; /** * Option to declare path to graph statistics */ private static final String OPTION_STATISTICS = "s"; static { OPTIONS.addOption(OPTION_INPUT_PATH, "input-path", true, "Input graph directory"); OPTIONS.addOption(OPTION_INPUT_FORMAT, "input-format", true, "Format of the input graph [csv,json]"); OPTIONS.addOption(OPTION_OUTPUT_PATH, "output-path", true, "Output graph directory"); OPTIONS.addOption(OPTION_QUERY_GRAPH, "query", true, "GDL/Cypher based query graph"); OPTIONS.addOption(OPTION_ALGORITHM, "algorithm", true, String.format("Algorithm to execute the matching [%s]", StringUtils.join(AVAILABLE_ALGORITHMS, ','))); OPTIONS.addOption(OPTION_ATTACH_DATA, "attach-data", false, "Attach original vertex and edge data to the match graph"); OPTIONS.addOption(OPTION_STATISTICS, "statistics", true, "Path to graph statistics used for Cypher engine"); } /** * Runs the simulation using the given arguments. * <p> * -i, --input-path (path to data graph)<br /> * -f, --input-format (format of data graph [json,csv])<br /> * -o, --output-path (path to output directory)<br /> * -q, --query (GDL/Cypher query string)<br /> * -a, --algorithm [dual-bulk,dual-delta,iso-exp,iso-exp-bc-hf,cypher]<br /> * -d, --attach-data (default: false)<br /> * -s, --statistics (path to input graph statistics, used for cypher engine)<br /> * * @param args option line */ @SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { CommandLine cmd = parseArguments(args, PatternMatchingRunner.class.getName()); if (cmd == null) { return; } performSanityCheck(cmd); String inputDir = cmd.getOptionValue(OPTION_INPUT_PATH); String inputFormat = cmd.getOptionValue(OPTION_INPUT_FORMAT).toLowerCase(); String outputDir = cmd.getOptionValue(OPTION_OUTPUT_PATH); String query = cmd.getOptionValue(OPTION_QUERY_GRAPH); String algorithm = cmd.getOptionValue(OPTION_ALGORITHM); boolean attachData = cmd.hasOption(OPTION_ATTACH_DATA); boolean hasStatistics = cmd.hasOption(OPTION_STATISTICS); GraphStatistics statistics = null; if (hasStatistics) { String statisticsPath = cmd.getOptionValue(OPTION_STATISTICS); if (statisticsPath.startsWith("hdfs://")) { statistics = GraphStatisticsHDFSReader.read(statisticsPath, new Configuration()); } else { statistics = GraphStatisticsLocalFSReader.read(statisticsPath); } } LogicalGraph epgmDatabase = readLogicalGraph(inputDir, inputFormat); GraphCollection result = execute(epgmDatabase, query, attachData, algorithm, statistics); writeGraphCollection(result, outputDir); System.out.println(String.format("Net runtime [s]: %d", getExecutionEnvironment() .getLastJobExecutionResult() .getNetRuntime(TimeUnit.SECONDS))); } /** * Checks if the minimum of arguments is provided * * @param cmd command line */ private static void performSanityCheck(final CommandLine cmd) { if (!cmd.hasOption(OPTION_INPUT_PATH)) { throw new IllegalArgumentException("Define a graph input directory."); } if (!cmd.hasOption(OPTION_INPUT_FORMAT)) { throw new IllegalArgumentException("Define an input format"); } if (!cmd.hasOption(OPTION_OUTPUT_PATH)) { throw new IllegalArgumentException("Define an graph output directory."); } if (!cmd.hasOption(OPTION_QUERY_GRAPH)) { throw new IllegalArgumentException("Define a graph query."); } if (!cmd.hasOption(OPTION_ALGORITHM)) { throw new IllegalArgumentException("Chose an algorithm."); } if (cmd.getOptionValue(OPTION_ALGORITHM).equals(ALGO_CYPHER)) { if (!cmd.hasOption(OPTION_STATISTICS)) { throw new IllegalArgumentException("Provide graph statistics when using Cypher engine"); } } } /** * Executes dual simulation on the given logical graph. * * @param databaseGraph data graph * @param query query graph * @param attachData attach vertex and edge data to the match graph * @param algorithm algorithm to use for pattern matching * @param statistics statistics about the input graph (needed for cypher) * @return matching subgraphs */ private static GraphCollection execute( LogicalGraph databaseGraph, String query, boolean attachData, String algorithm, GraphStatistics statistics) { PatternMatching op; switch (algorithm) { case ALGO_DUAL_BULK: op = new DualSimulation(query, attachData, true); break; case ALGO_DUAL_DELTA: op = new DualSimulation(query, attachData, false); break; case ALGO_ISO_EXP: op = new ExplorativePatternMatching.Builder() .setQuery(query) .setAttachData(attachData) .setMatchStrategy(MatchStrategy.ISOMORPHISM) .build(); break; case ALGO_ISO_EXP_BC_HASH_FIRST: op = new ExplorativePatternMatching.Builder() .setQuery(query) .setAttachData(attachData) .setMatchStrategy(MatchStrategy.ISOMORPHISM) .setTraverserStrategy(TraverserStrategy.SET_PAIR_BULK_ITERATION) .setEdgeStepJoinStrategy(BROADCAST_HASH_FIRST) .setVertexStepJoinStrategy(BROADCAST_HASH_FIRST) .build(); break; case ALGO_CYPHER: op = new CypherPatternMatching(query, attachData, MatchStrategy.ISOMORPHISM, MatchStrategy.ISOMORPHISM, statistics); break; default : throw new IllegalArgumentException(algorithm + " not supported"); } return op.execute(databaseGraph); } @Override public String getDescription() { return PatternMatchingRunner.class.getName(); } }