package com.formulasearchengine.mathosphere.mlp.contracts;
import com.formulasearchengine.mathosphere.mlp.cli.FlinkMlpCommandConfig;
import com.formulasearchengine.mathosphere.mlp.pojos.ParsedWikiDocument;
import com.formulasearchengine.mathosphere.mlp.pojos.RawWikiDocument;
import org.junit.Test;
import java.util.List;
public class RussianWikiPipelineTest {
@Test
public void fullPipeline() throws Exception {
String wikiRuFile = "com/formulasearchengine/mathosphere/mlp/wikirusample.xml";
TextAnnotatorMapper textAnnotator = ruTextAnnotator();
List<RawWikiDocument> docs = readDocs(wikiRuFile);
for (RawWikiDocument doc : docs) {
ParsedWikiDocument parsedDoc = textAnnotator.map(doc);
System.out.println(parsedDoc);
}
}
private List<RawWikiDocument> readDocs(String wikiRuFile) throws Exception {
return TextAnnotatorMapperTest.readWikiTextDocuments(wikiRuFile);
}
private static TextAnnotatorMapper ruTextAnnotator() throws Exception {
String[] params = {"--language", "ru", "-pos", ""};
FlinkMlpCommandConfig config = FlinkMlpCommandConfig.from(params);
TextAnnotatorMapper textAnnotator = new TextAnnotatorMapper(config);
textAnnotator.open(null);
return textAnnotator;
}
}