package com.formulasearchengine.mathosphere.mlp; import com.formulasearchengine.mathosphere.mlp.cli.FlinkMlpCommandConfig; import com.formulasearchengine.mathosphere.mlp.contracts.JsonSerializerMapper; import com.formulasearchengine.mathosphere.mlp.contracts.PatternMatcherMapper; import com.formulasearchengine.mathosphere.mlp.contracts.TextAnnotatorMapper; import com.formulasearchengine.mathosphere.mlp.contracts.TextExtractorMapper; import com.formulasearchengine.mathosphere.mlp.pojos.ParsedWikiDocument; import com.formulasearchengine.mathosphere.mlp.pojos.WikiDocumentOutput; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.operators.DataSource; import org.apache.flink.core.fs.FileSystem.WriteMode; public class PatternMatchingRelationFinder { public static void main(String[] args) throws Exception { FlinkMlpCommandConfig config = FlinkMlpCommandConfig.test(); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<String> source = readWikiDump(config, env); DataSet<ParsedWikiDocument> documents = source.flatMap(new TextExtractorMapper()) .map(new TextAnnotatorMapper(config)); DataSet<WikiDocumentOutput> relations = documents.map(new PatternMatcherMapper()); relations.map(new JsonSerializerMapper<>()) .writeAsText(config.getOutputDir(), WriteMode.OVERWRITE); env.execute("Pattern Matcher Relation Finder"); } public static DataSource<String> readWikiDump(FlinkMlpCommandConfig config, ExecutionEnvironment env) { return FlinkMlpRelationFinder.readWikiDump(config, env); } }