/*
* This file is part of Gradoop.
*
* Gradoop is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Gradoop is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
*/
package org.gradoop.flink.io.impl.tlf;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.core.fs.FileSystem;
import org.gradoop.flink.io.api.DataSink;
import org.gradoop.flink.io.impl.tlf.constants.BroadcastNames;
import org.gradoop.flink.io.impl.tlf.functions.EdgeLabelList;
import org.gradoop.flink.io.impl.tlf.functions.ElementLabelEncoder;
import org.gradoop.flink.io.impl.tlf.functions.TLFDictionaryFileFormat;
import org.gradoop.flink.io.impl.tlf.functions.TLFFileFormat;
import org.gradoop.flink.io.impl.tlf.functions.VertexLabelList;
import org.gradoop.flink.model.impl.GraphTransactions;
import org.gradoop.flink.model.impl.LogicalGraph;
import org.gradoop.flink.representation.transactional.GraphTransaction;
import org.gradoop.flink.util.GradoopFlinkConfig;
import org.gradoop.flink.io.impl.tlf.functions.TLFDictionaryMapGroupReducer;
import org.gradoop.flink.model.impl.GraphCollection;
import java.io.IOException;
import java.util.Map;
/**
* Writes an EPGM representation into one TLF file. The format
* is documented at {@link TLFFileFormat}.
*/
public class TLFDataSink extends TLFBase implements DataSink {
/**
* Creates a new data sink. Paths can be local (file://) or HDFS (hdfs://).
*
* @param tlfPath tlf data file
* @param config Gradoop Flink configuration
*/
public TLFDataSink(String tlfPath, GradoopFlinkConfig config) {
super(tlfPath, "", "", config);
}
/**
* Creates a new data sink. Paths can be local (file://) or HDFS (hdfs://).
*
* @param tlfPath tlf data file
* @param tlfVertexDictionaryPath tlf vertex dictionary file
* @param tlfEdgeDictionaryPath tlf edge dictionary file
* @param config Gradoop Flink configuration
*/
public TLFDataSink(String tlfPath, String tlfVertexDictionaryPath,
String tlfEdgeDictionaryPath, GradoopFlinkConfig config) {
super(tlfPath, tlfVertexDictionaryPath, tlfEdgeDictionaryPath, config);
}
@Override
public void write(LogicalGraph logicalGraph) throws IOException {
write(logicalGraph, false);
}
@Override
public void write(GraphCollection graphCollection) throws
IOException {
write(graphCollection, false);
}
@Override
public void write(GraphTransactions graphTransactions) throws
IOException {
write(graphTransactions, false);
}
@Override
public void write(LogicalGraph logicalGraph, boolean overWrite) throws IOException {
write(GraphCollection.fromGraph(logicalGraph).toTransactions(), overWrite);
}
@Override
public void write(GraphCollection graphCollection, boolean overWrite) throws IOException {
write(graphCollection.toTransactions(), overWrite);
}
@Override
public void write(GraphTransactions graphTransactions, boolean overWrite) throws IOException {
FileSystem.WriteMode writeMode =
overWrite ? FileSystem.WriteMode.OVERWRITE : FileSystem.WriteMode.NO_OVERWRITE;
DataSet<GraphTransaction> simpleLabelTransaction;
DataSet<Map<String, Integer>> vertexDictionary = null;
DataSet<Map<String, Integer>> edgeDictionary = null;
// if the graph transaction vertex labels are set by a dictionary
if (hasVertexDictionary()) {
vertexDictionary = graphTransactions.getTransactions()
// get a vertex dictionary for each transaction
.flatMap(new VertexLabelList())
.distinct()
// reduce them to one dictionary without duplicates
.reduceGroup(new TLFDictionaryMapGroupReducer());
// write the vertex dictionary
vertexDictionary
.writeAsFormattedText(
getTLFVertexDictionaryPath(), writeMode, new TLFDictionaryFileFormat());
}
if (hasEdgeDictionary()) {
edgeDictionary = graphTransactions.getTransactions()
// get an edge dictionary for each transaction
.flatMap(new EdgeLabelList())
.distinct()
// reduce them to one dictionary without duplicates
.reduceGroup(new TLFDictionaryMapGroupReducer());
// write the edge dictionary
edgeDictionary
.writeAsFormattedText(getTLFEdgeDictionaryPath(), writeMode, new TLFDictionaryFileFormat());
}
if (hasVertexDictionary() || hasEdgeDictionary()) {
if (hasVertexDictionary() && hasEdgeDictionary()) {
simpleLabelTransaction = graphTransactions.getTransactions()
// map the simple integer-like labels
.map(new ElementLabelEncoder(
hasVertexDictionary(), hasEdgeDictionary()))
.withBroadcastSet(vertexDictionary,
BroadcastNames.VERTEX_DICTIONARY)
.withBroadcastSet(edgeDictionary,
BroadcastNames.EDGE_DICTIONARY);
} else if (hasVertexDictionary()) {
simpleLabelTransaction = graphTransactions.getTransactions()
// map the simple integer-like labels
.map(new ElementLabelEncoder(
hasVertexDictionary(), hasEdgeDictionary()))
.withBroadcastSet(vertexDictionary,
BroadcastNames.VERTEX_DICTIONARY);
} else {
simpleLabelTransaction = graphTransactions.getTransactions()
// map the simple integer-like labels
.map(new ElementLabelEncoder(
hasVertexDictionary(), hasEdgeDictionary()))
.withBroadcastSet(edgeDictionary,
BroadcastNames.EDGE_DICTIONARY);
}
// write the TLF format adjusted graphs to file
simpleLabelTransaction
.writeAsFormattedText(getTLFPath(), writeMode, new TLFFileFormat());
// if there was no dictionary used the graphs can simply be written
} else {
graphTransactions.getTransactions()
.writeAsFormattedText(getTLFPath(), writeMode, new TLFFileFormat());
}
}
}