/* * This file is part of Gradoop. * * Gradoop is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Gradoop is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Gradoop. If not, see <http://www.gnu.org/licenses/>. */ package org.gradoop.examples.dimspan.data_source; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.gradoop.flink.algorithms.fsm.dimspan.tuples.LabeledGraphStringString; import org.gradoop.flink.io.impl.tlf.inputformats.TLFInputFormat; import org.gradoop.flink.util.GradoopFlinkConfig; import java.io.IOException; /** * Lightweight data source for TLF formatted string-labeled graphs. * NOTE, no consistency check, inconsistent data will cause errors! */ public class DIMSpanTLFSource { /** * Gradoop configuration */ private final GradoopFlinkConfig config; /** * input file path */ private final String filePath; /** * Creates a new data source. Paths can be local (file://) or HDFS(hdfs://). * * @param filePath input file path * @param config Gradoop configuration */ public DIMSpanTLFSource(String filePath, GradoopFlinkConfig config) { if (config == null) { throw new IllegalArgumentException("config must not be null"); } if (filePath == null) { throw new IllegalArgumentException("vertex file must not be null"); } this.filePath = filePath; this.config = config; } /** * Reads the input as dataset of TLFGraphs. * * @return io graphs */ public DataSet<LabeledGraphStringString> getGraphs() throws IOException { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); return env .readHadoopFile(new TLFInputFormat(), LongWritable.class, Text.class, getFilePath()) .map(new DIMSpanGraphFromText()); } // GETTERS AND SETTERS private GradoopFlinkConfig getConfig() { return config; } private String getFilePath() { return filePath; } }