/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.examples.io; import org.apache.flink.api.common.ProgramDescription; import org.apache.flink.api.java.ExecutionEnvironment; import org.gradoop.examples.AbstractRunner; import org.gradoop.flink.io.impl.json.JSONDataSink; import org.gradoop.flink.io.impl.json.JSONDataSource; import org.gradoop.flink.model.impl.GraphCollection; import org.gradoop.flink.model.impl.LogicalGraph; import org.gradoop.flink.model.impl.operators.combination.ReduceCombination; import org.gradoop.flink.model.impl.operators.grouping.Grouping; import org.gradoop.flink.util.GradoopFlinkConfig; import java.util.Arrays; /** * Example program that reads a graph from an EPGM-specific JSON representation * into a {@link GraphCollection}, does some computation and stores the * resulting {@link LogicalGraph} as JSON. * * In the JSON representation, an EPGM graph collection (or Logical Graph) is * stored in three (or two) separate files. Each line in those files contains * a valid JSON-document describing a single entity: * * Example graphHead (data attached to logical graphs): * * { * "id":"graph-uuid-1", * "data":{"interest":"Graphs","vertexCount":4}, * "meta":{"label":"Community"} * } * * Example vertex JSON document: * * { * "id":"vertex-uuid-1", * "data":{"gender":"m","city":"Dresden","name":"Dave","age":40}, * "meta":{"label":"Person","graphs":["graph-uuid-1"]} * } * * Example edge JSON document: * * { * "id":"edge-uuid-1", * "source":"14505ae1-5003-4458-b86b-d137daff6525", * "target":"ed8386ee-338a-4177-82c4-6c1080df0411", * "data":{}, * "meta":{"label":"friendOf","graphs":["graph-uuid-1"]} * } * * An example graph collection can be found under src/main/resources/data.json. * For further information, have a look at the {@link org.gradoop.flink.io.impl.json} * package. */ public class JSONExample extends AbstractRunner implements ProgramDescription { /** * Reads an EPGM graph collection from a directory that contains the separate * files. Files can be stored in local file system or HDFS. * * args[0]: path to graph head file * args[1]: path to vertex file * args[2]: path to edge file * args[3]: path to write output graph * * @param args program arguments */ public static void main(String[] args) throws Exception { if (args.length != 4) { throw new IllegalArgumentException( "provide graph/vertex/edge paths and output directory"); } final String graphHeadFile = args[0]; final String vertexFile = args[1]; final String edgeFile = args[2]; final String outputDir = args[3]; // init Flink execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // create default Gradoop config GradoopFlinkConfig config = GradoopFlinkConfig.createConfig(env); // create DataSource JSONDataSource dataSource = new JSONDataSource(graphHeadFile, vertexFile, edgeFile, config); // read graph collection from DataSource GraphCollection graphCollection = dataSource.getGraphCollection(); // do some analytics LogicalGraph schema = graphCollection .reduce(new ReduceCombination()) .groupBy(Arrays.asList(Grouping.LABEL_SYMBOL), Arrays.asList(Grouping.LABEL_SYMBOL)); // write resulting graph to DataSink schema.writeTo(new JSONDataSink( outputDir + "graphHeads.json", outputDir + "vertices.json", outputDir + "edges.json", config)); // execute program env.execute(); } @Override public String getDescription() { return "EPGM JSON IO Example"; } }