package com.formulasearchengine.mathosphere.mlp.contracts;
import com.formulasearchengine.mathosphere.mlp.cli.MachineLearningDefinienExtractionConfig;
import com.formulasearchengine.mathosphere.mlp.pojos.EvaluationResult;
import com.formulasearchengine.mathosphere.mlp.pojos.Relation;
import com.formulasearchengine.mathosphere.mlp.pojos.WikiDocumentOutput;
import com.formulasearchengine.mathosphere.utils.GoldUtil;
import com.formulasearchengine.mlp.evaluation.Evaluator;
import com.formulasearchengine.mlp.evaluation.pojo.GoldEntry;
import com.formulasearchengine.mlp.evaluation.pojo.ScoreSummary;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.util.Collector;
import java.io.File;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
/**
* All found candidates are equally good. Used to find a baseline how many definiens can be extracted at all.
* Created by Leo on 30.01.2017.
*/
public class StupidRelationScorer implements GroupReduceFunction<WikiDocumentOutput, EvaluationResult> {
private MachineLearningDefinienExtractionConfig config;
public StupidRelationScorer(MachineLearningDefinienExtractionConfig config) {
this.config = config;
}
@Override
public void reduce(Iterable<WikiDocumentOutput> values, Collector<EvaluationResult> out) throws Exception {
List<GoldEntry> gold = (new Evaluator()).readGoldEntries(new File(config.getGoldFile()));
Collection<String> extractions = new ArrayList<>();
//get all candidates
for (WikiDocumentOutput value : values) {
for (Relation relation : value.getRelations()) {
//retain only correctly extracted identifiers
if (GoldUtil.getGoldEntryByTitle(gold, value.getTitle()).getDefinitions()
.stream().map(i -> i.getIdentifier()).collect(Collectors.toList())
.contains(relation.getIdentifier())) {
String extraction =
value.getqId() + ","
+ "\"" + value.getTitle().replaceAll("\\s", "_") + "\","
+ "\"" + relation.getIdentifier() + "\","
+ "\"" + relation.getDefinition().toLowerCase() + "\"";
extractions.add(extraction);
}
}
}
//remove duplicates from extraction
StringBuilder e = new StringBuilder();
Set<String> set = new HashSet();
set.addAll(extractions);
List<String> list = new ArrayList<>(set);
list.sort(Comparator.naturalOrder());
for (String extraction : list) {
e.append(extraction).append("\n");
}
Evaluator evaluator = new Evaluator();
StringReader reader = new StringReader(e.toString());
ScoreSummary s = evaluator.evaluate(evaluator.readExtractions(reader, gold, true), gold, true);
System.out.println(s.toString());
}
}