package com.formulasearchengine.mathosphere.mlp;
import com.google.common.collect.Multiset;
import com.formulasearchengine.mathosphere.mlp.cli.CliParams;
import com.formulasearchengine.mathosphere.mlp.cli.ListCommandConfig;
import com.formulasearchengine.mathosphere.mlp.cli.MlpCommandConfig;
import com.formulasearchengine.mathosphere.mlp.contracts.CreateCandidatesMapper;
import com.formulasearchengine.mathosphere.mlp.contracts.TextAnnotatorMapper;
import com.formulasearchengine.mathosphere.mlp.pojos.ParsedWikiDocument;
import com.formulasearchengine.mathosphere.mlp.pojos.RawWikiDocument;
import com.formulasearchengine.mathosphere.mlp.pojos.Relation;
import com.formulasearchengine.mathosphere.mlp.pojos.WikiDocumentOutput;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.List;
public class RelationExtractor {
public static void main(String[] args) throws Exception {
String params[] = {"extract", "-in", "C:/tmp/mlp/linear_regression.txt"};
MlpCommandConfig config = CliParams.from(params).getExtractCommandConfig();
run(config);
}
public static void run(MlpCommandConfig config) throws Exception {
WikiDocumentOutput output = getWikiDocumentOutput(config);
try (PrintWriter pw = createPrinter(config)) {
List<Relation> relations = output.getRelations();
CSVPrinter printer = CSVFormat.DEFAULT.withRecordSeparator("\n").print(pw);
for (Relation r : relations) {
String[] record = {r.getIdentifier(), r.getDefinition(), Double.toString(r.getScore())};
printer.printRecord(record);
}
printer.flush();
pw.flush();
}
}
private static WikiDocumentOutput getWikiDocumentOutput(MlpCommandConfig config) throws Exception {
TextAnnotatorMapper annotator = new TextAnnotatorMapper(config);
annotator.open(null);
String filePath = config.getInput();
String text = FileUtils.readFileToString(new File(filePath), "UTF-8");
RawWikiDocument doc = new RawWikiDocument(filePath, 0, text);
ParsedWikiDocument parsedDocument = annotator.map(doc);
CreateCandidatesMapper mlp = new CreateCandidatesMapper(config);
return mlp.map(parsedDocument);
}
public static PrintWriter createPrinter(MlpCommandConfig config) throws FileNotFoundException {
if (StringUtils.isNotBlank(config.getOutput())) {
return new PrintWriter(new File(config.getOutput()));
}
return new PrintWriter(System.out);
}
public static void list(ListCommandConfig config) {
try {
PrintWriter pw = createPrinter(config);
WikiDocumentOutput output = getWikiDocumentOutput(config);
for (Multiset.Entry<String> stringEntry : output.getIdentifiers()) {
pw.println(stringEntry.getElement());
}
pw.flush();
} catch (Exception e) {
e.printStackTrace();
}
}
}