/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.flink.io.impl.csv; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.tuple.Tuple2; import org.gradoop.common.model.impl.pojo.Edge; import org.gradoop.common.model.impl.pojo.Vertex; import org.gradoop.flink.io.api.DataSource; import org.gradoop.flink.io.impl.csv.functions.CSVEdgeToEdge; import org.gradoop.flink.io.impl.csv.functions.CSVLineToVertex; import org.gradoop.flink.model.impl.GraphCollection; import org.gradoop.flink.model.impl.GraphTransactions; import org.gradoop.flink.model.impl.LogicalGraph; import org.gradoop.flink.util.GradoopFlinkConfig; /** * A graph data source for CSV files. */ public class CSVDataSource extends CSVBase implements DataSource { /** * Creates a new CSV data source. * * @param csvPath path to the directory containing the CSV files * @param config Gradoop Flink configuration */ public CSVDataSource(String csvPath, GradoopFlinkConfig config) { super(csvPath, config); } @Override public LogicalGraph getLogicalGraph() { DataSet<Tuple2<String, String>> metaData = readMetaData(getMetaDataPath()); DataSet<Vertex> vertices = getConfig().getExecutionEnvironment() .readTextFile(getVertexCSVPath()) .map(new CSVLineToVertex(getConfig().getVertexFactory())) .withBroadcastSet(metaData, BC_METADATA); DataSet<Edge> edges = getConfig().getExecutionEnvironment() .readTextFile(getEdgeCSVPath()) .map(new CSVEdgeToEdge(getConfig().getEdgeFactory())) .withBroadcastSet(metaData, BC_METADATA); return LogicalGraph.fromDataSets(vertices, edges, getConfig()); } @Override public GraphCollection getGraphCollection() { return GraphCollection.fromGraph(getLogicalGraph()); } @Override public GraphTransactions getGraphTransactions() { return getGraphCollection().toTransactions(); } }