/*
* This file is part of Gradoop.
*
* Gradoop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Gradoop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
*/
package org.gradoop.benchmark.grouping;
import com.google.common.collect.Lists;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.io.FileUtils;
import org.apache.flink.api.common.ProgramDescription;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.model.impl.LogicalGraph;
import org.gradoop.flink.model.impl.operators.grouping.Grouping;
import org.gradoop.flink.model.impl.operators.grouping.GroupingStrategy;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.CountAggregator;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.MaxAggregator;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.MinAggregator;
import org.gradoop.flink.model.impl.operators.grouping.functions.aggregation.PropertyValueAggregator;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
/**
* A dedicated program for parametrized graph grouping benchmark.
*/
public class GroupingBenchmark extends AbstractRunner
implements ProgramDescription {
/**
* Option to declare path to input graph
*/
private static final String OPTION_INPUT_PATH = "i";
/**
* Option to declare path to output graph
*/
private static final String OPTION_OUTPUT_PATH = "o";
/**
* Option to set the grouping strategy
*/
private static final String OPTION_GROUPING_STRATEGY = "s";
/**
* Vertex grouping key option
*/
private static final String OPTION_VERTEX_GROUPING_KEY = "vgk";
/**
* EPGMEdge grouping key option
*/
private static final String OPTION_EDGE_GROUPING_KEY = "egk";
/**
* Use vertex label option
*/
private static final String OPTION_USE_VERTEX_LABELS = "uvl";
/**
* Use edge label option
*/
private static final String OPTION_USE_EDGE_LABELS = "uel";
/**
* Path to CSV log file
*/
private static final String OPTION_CSV_PATH = "csv";
/**
* Used vertex aggregator functions (min, max, count, none)
*/
private static final String OPTION_VERTEX_AGGREGATION_FUNCS = "vagg";
/**
* Used vertex aggregation keys
*/
private static final String OPTION_VERTEX_AGGREGATION_KEYS = "vak";
/**
* Used Vertex aggregation result keys
*/
private static final String OPTION_VERTEX_AGGREGATION_RESULT_KEYS = "vark";
/**
* Used EPGMEdge aggregator functions (min, max, count, none)
*/
private static final String OPTION_EDGE_AGGREGATION_FUNCS = "eagg";
/**
* Used vertex aggregation keys
*/
private static final String OPTION_EDGE_AGGREGATION_KEYS = "eak";
/**
* Used Vertex aggregation result keys
*/
private static final String OPTION_EDGE_AGGREGATION_RESULT_KEYS = "eark";
/**
* Grouping strategy
*/
private static GroupingStrategy STRATEGY = GroupingStrategy.GROUP_REDUCE;
/**
* Used VertexKey for grouping
*/
private static String VERTEX_GROUPING_KEYS;
/**
* Used EdgeKey for grouping
*/
private static String EDGE_GROUPING_KEYS;
/**
* Used csv path
*/
private static String CSV_PATH;
/**
* Used hdfs INPUT_PATH
*/
private static String INPUT_PATH;
/**
* Used hdfs OUTPUT_PATH
*/
private static String OUTPUT_PATH;
/**
* Used vertex aggregators
*/
private static String VERTEX_AGGREGATORS;
/**
* Used vertex aggregator keys
*/
private static String VERTEX_AGGREGATOR_KEYS;
/**
* Used vertex aggregator result keys
*/
private static String VERTEX_AGGREGATOR_RESULT_KEYS;
/**
* Used edge aggregators
*/
private static String EDGE_AGGREGATORS;
/**
* Used edge aggregator keys
*/
private static String EDGE_AGGREGATOR_KEYS;
/**
* Used edge aggregator result keys
*/
private static String EDGE_AGGREGATOR_RESULT_KEYS;
/**
* Uses VertexLabels
*/
private static boolean USE_VERTEX_LABELS;
/**
* Uses EdgeLabels
*/
private static boolean USE_EDGE_LABELS;
/**
* Token separator for input strings
*/
private static final Pattern TOKEN_SEPARATOR = Pattern.compile(",");
static {
OPTIONS.addOption(OPTION_INPUT_PATH, "vertex-input-path", true,
"Path to vertex file");
OPTIONS.addOption(OPTION_OUTPUT_PATH, "output-path", true,
"Path to write output files to");
OPTIONS.addOption(OPTION_GROUPING_STRATEGY, "strategy", true,
"Grouping strategy (GR, GC)");
OPTIONS.addOption(OPTION_USE_VERTEX_LABELS, "use-vertex-labels", false,
"Group on vertex labels");
OPTIONS.addOption(OPTION_USE_EDGE_LABELS, "use-edge-labels", false,
"Group on edge labels");
OPTIONS.addOption(OPTION_VERTEX_GROUPING_KEY, "vertex-grouping-key", true,
"EPGMProperty key to group vertices on.");
OPTIONS.addOption(OPTION_EDGE_GROUPING_KEY, "edge-grouping-key", true,
"EPGMProperty key to group edges on.");
OPTIONS.addOption(OPTION_CSV_PATH, "csv-path", true, "Path of the " +
"generated CSV-File");
OPTIONS.addOption(OPTION_VERTEX_AGGREGATION_FUNCS, "vertex-aggregator",
true, "Applied aggregation function on vertices");
OPTIONS.addOption(OPTION_VERTEX_AGGREGATION_KEYS,
"vertex-aggregation-keys", true, "keys for vertex aggregation");
OPTIONS.addOption(OPTION_VERTEX_AGGREGATION_RESULT_KEYS,
"vertex-aggregation-result-keys", true, "keys for aggregation result");
OPTIONS.addOption(OPTION_EDGE_AGGREGATION_FUNCS, "edge-aggregator", true,
"Applied aggregation function on edges");
OPTIONS.addOption(OPTION_EDGE_AGGREGATION_KEYS, "edge-aggregation-keys",
true, "keys for edge aggregation");
OPTIONS.addOption(OPTION_EDGE_AGGREGATION_RESULT_KEYS,
"edge-aggregation-result-keys", true, "keys for aggregation result");
}
/**
* Main program to run the benchmark. Arguments are the available options.
*
* @param args program arguments
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
CommandLine cmd = parseArguments(args,
GroupingBenchmark.class.getName());
if (cmd == null) {
return;
}
// test if minimum arguments are set
performSanityCheck(cmd);
// read cmd arguments
readCMDArguments(cmd);
// initialize EPGM database
LogicalGraph graphDatabase = readLogicalGraph(INPUT_PATH);
// initialize grouping keys
List<String> vertexKeys = Lists.newArrayList();
if (VERTEX_GROUPING_KEYS != null) {
vertexKeys = getKeys(VERTEX_GROUPING_KEYS);
}
List<String> edgeKeys = Lists.newArrayList();
if (EDGE_GROUPING_KEYS != null) {
edgeKeys = getKeys(EDGE_GROUPING_KEYS);
}
// initialize aggregators
List<PropertyValueAggregator> vAggregators = Lists.newArrayList();
List<PropertyValueAggregator> eAggregators = Lists.newArrayList();
if (cmd.hasOption(OPTION_VERTEX_AGGREGATION_KEYS)) {
vAggregators =
getAggregators(VERTEX_AGGREGATORS, VERTEX_AGGREGATOR_KEYS,
VERTEX_AGGREGATOR_RESULT_KEYS);
}
if (cmd.hasOption(OPTION_EDGE_AGGREGATION_KEYS)) {
eAggregators = getAggregators(EDGE_AGGREGATORS, EDGE_AGGREGATOR_KEYS,
EDGE_AGGREGATOR_RESULT_KEYS);
}
// build grouping operator
Grouping grouping = getOperator(STRATEGY,
vertexKeys, edgeKeys, USE_VERTEX_LABELS, USE_EDGE_LABELS, vAggregators,
eAggregators);
// call grouping on whole database graph
LogicalGraph summarizedGraph = graphDatabase.callForGraph(grouping);
if (summarizedGraph != null) {
writeLogicalGraph(summarizedGraph, OUTPUT_PATH);
writeCSV();
} else {
System.err.println("wrong parameter constellation");
}
}
/**
* Checks if the minimum of arguments is provided
*
* @param cmd command line
*/
private static void performSanityCheck(final CommandLine cmd) {
if (!cmd.hasOption(OPTION_INPUT_PATH)) {
throw new IllegalArgumentException("Define a graph input directory.");
}
if (!cmd.hasOption(OPTION_CSV_PATH)) {
throw new IllegalArgumentException("Path to CSV-File need to be set");
}
if (!cmd.hasOption(OPTION_VERTEX_GROUPING_KEY) &&
!cmd.hasOption(OPTION_USE_VERTEX_LABELS)) {
throw new IllegalArgumentException(
"Chose at least one vertex grouping key or use vertex labels.");
}
if (!cmd.hasOption(OPTION_VERTEX_AGGREGATION_FUNCS)) {
throw new IllegalArgumentException("Vertex aggregator need to be set! " +
"(max, min, count, none (or list of these)");
}
if (!cmd.hasOption(OPTION_EDGE_AGGREGATION_FUNCS)) {
throw new IllegalArgumentException("Edge aggregator need to be set! " +
"(max, min, count, none (or list of these)");
}
}
/**
* Reads the given arguments from command line
*
* @param cmd command line
*/
private static void readCMDArguments(final CommandLine cmd) {
// read input output paths
INPUT_PATH = cmd.getOptionValue(OPTION_INPUT_PATH);
OUTPUT_PATH = cmd.getOptionValue(OPTION_OUTPUT_PATH);
CSV_PATH = cmd.getOptionValue(OPTION_CSV_PATH);
// initialize grouping strategy
if (cmd.hasOption(OPTION_GROUPING_STRATEGY)) {
String value = cmd.getOptionValue(OPTION_GROUPING_STRATEGY);
if (value.toUpperCase().equals("GC")) {
STRATEGY = GroupingStrategy.GROUP_COMBINE;
}
}
// read if vertex or edge keys should be used
boolean useVertexKey = cmd.hasOption(OPTION_VERTEX_GROUPING_KEY);
VERTEX_GROUPING_KEYS =
useVertexKey ? cmd.getOptionValue(OPTION_VERTEX_GROUPING_KEY) : null;
boolean useEdgeKey = cmd.hasOption(OPTION_EDGE_GROUPING_KEY);
EDGE_GROUPING_KEYS =
useEdgeKey ? cmd.getOptionValue(OPTION_EDGE_GROUPING_KEY) : null;
// read vertex and edge labels
USE_VERTEX_LABELS = cmd.hasOption(OPTION_USE_VERTEX_LABELS);
USE_EDGE_LABELS = cmd.hasOption(OPTION_USE_EDGE_LABELS);
// read aggregators
VERTEX_AGGREGATORS = cmd.getOptionValue(OPTION_VERTEX_AGGREGATION_FUNCS);
boolean vertexAggKeys = cmd.hasOption(OPTION_VERTEX_AGGREGATION_KEYS);
if (vertexAggKeys) {
VERTEX_AGGREGATOR_KEYS =
cmd.getOptionValue(OPTION_VERTEX_AGGREGATION_KEYS);
VERTEX_AGGREGATOR_RESULT_KEYS =
cmd.getOptionValue(OPTION_VERTEX_AGGREGATION_RESULT_KEYS);
}
EDGE_AGGREGATORS = cmd.getOptionValue(OPTION_EDGE_AGGREGATION_FUNCS);
boolean edgeAggKeys = cmd.hasOption(OPTION_EDGE_AGGREGATION_KEYS);
if (edgeAggKeys) {
EDGE_AGGREGATOR_KEYS = cmd.getOptionValue(OPTION_EDGE_AGGREGATION_KEYS);
EDGE_AGGREGATOR_RESULT_KEYS =
cmd.getOptionValue(OPTION_EDGE_AGGREGATION_RESULT_KEYS);
}
}
/**
* Method to get keys as list
*
* @param keys keys string
* @return keys as list
*/
private static List<String> getKeys(String keys) {
keys = keys.replace("\\s", "");
return Arrays.asList(TOKEN_SEPARATOR.split(keys));
}
/**
* Method to build aggregators
*
* @param aggs aggregators as whole string
* @param keys aggregator keys as whole string
* @param resultKeys aggregator result keys as whole string
* @return List of PropertyValueAggregators
*/
private static List<PropertyValueAggregator> getAggregators(String
aggs, String keys, String resultKeys) {
List<PropertyValueAggregator> aggregatorList = Lists.newArrayList();
aggs = aggs.replace("\\s", "");
keys = keys.replace("\\s", "");
resultKeys = resultKeys.replace("\\s", "");
List<String> aggsList = Arrays.asList(TOKEN_SEPARATOR.split(aggs));
List<String> keyList = Arrays.asList(TOKEN_SEPARATOR.split(keys));
List<String> resultKeyList = Arrays.asList(TOKEN_SEPARATOR.split
(resultKeys));
if (!aggs.equals("none")) {
for (int i = 0; i < aggsList.size(); i++) {
switch (aggsList.get(i)) {
case "count" :
aggregatorList.add(new CountAggregator());
break;
case "max" :
aggregatorList.add(new MaxAggregator(keyList.get(i),
resultKeyList.get(i)));
break;
case "min" :
aggregatorList.add(new MinAggregator(keyList.get(i),
resultKeyList.get(i)));
break;
default:
aggregatorList.add(null);
break;
}
}
}
return aggregatorList;
}
/**
* Returns the grouping operator implementation based on the given strategy.
*
* @param strategy grouping strategy to use
* @param vertexKeys vertex property keys used for grouping
* @param edgeKeys edge property keys used for grouping
* @param useVertexLabels use vertex label for grouping, true/false
* @param useEdgeLabels use edge label for grouping, true/false
* @param vAggs used vertex aggregators
* @param eAggs used edge aggregators
* @return grouping operator implementation
*/
private static Grouping getOperator(GroupingStrategy strategy,
List<String> vertexKeys, List<String> edgeKeys,
boolean useVertexLabels, boolean useEdgeLabels,
List<PropertyValueAggregator> vAggs, List<PropertyValueAggregator> eAggs) {
Grouping.GroupingBuilder builder =
new Grouping.GroupingBuilder()
.setStrategy(strategy)
.useVertexLabel(useVertexLabels)
.useEdgeLabel(useEdgeLabels);
if (vAggs.size() > 0) {
for (PropertyValueAggregator agg:vAggs) {
if (agg != null) {
builder.addVertexAggregator(agg);
}
}
}
if (eAggs.size() > 0) {
for (PropertyValueAggregator agg: eAggs) {
if (agg != null) {
builder.addEdgeAggregator(agg);
}
}
}
if (vertexKeys.size() > 0) {
for (String vKey : vertexKeys) {
builder.addVertexGroupingKey(vKey);
}
}
if (edgeKeys.size() > 0) {
for (String eKey : edgeKeys) {
builder.addEdgeGroupingKey(eKey);
}
}
return builder.build();
}
/**
* Method to create and add lines to a csv-file
* @throws IOException
*/
private static void writeCSV() throws IOException {
String head = String.format("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s%n",
"Parallelism", "dataset", "vertexKeys", "edgeKeys", "USE_VERTEX_LABELS",
"USE_EDGE_LABELS", "Vertex Aggregators", "Vertex-Aggregator-Keys",
"EPGMEdge-Aggregators", "EPGMEdge-Aggregator-Keys", "Runtime(s)");
String tail = String.format("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s%n",
getExecutionEnvironment().getParallelism(), INPUT_PATH,
VERTEX_GROUPING_KEYS, EDGE_GROUPING_KEYS, USE_VERTEX_LABELS,
USE_EDGE_LABELS, VERTEX_AGGREGATORS, VERTEX_AGGREGATOR_KEYS,
EDGE_AGGREGATORS, EDGE_AGGREGATOR_KEYS,
getExecutionEnvironment().getLastJobExecutionResult()
.getNetRuntime(TimeUnit.SECONDS));
File f = new File(CSV_PATH);
if (f.exists() && !f.isDirectory()) {
FileUtils.writeStringToFile(f, tail, true);
} else {
PrintWriter writer = new PrintWriter(CSV_PATH, "UTF-8");
writer.print(head);
writer.print(tail);
writer.close();
}
}
/**
* {@inheritDoc}
*/
@Override
public String getDescription() {
return GroupingBenchmark.class.getName();
}
}