package eu.project.ttc.eval.aligner;
import java.io.IOException;
import java.io.Writer;
import java.nio.file.Path;
import java.util.List;
import java.util.OptionalInt;
import java.util.stream.IntStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Stopwatch;
import eu.project.ttc.engines.BilingualAligner;
import eu.project.ttc.engines.BilingualAligner.RequiresSize2Exception;
import eu.project.ttc.engines.BilingualAligner.TranslationCandidate;
import eu.project.ttc.eval.AlignmentEvalRun;
import eu.project.ttc.eval.AlignmentEvalService;
import eu.project.ttc.eval.AlignmentRecord;
import eu.project.ttc.eval.ConfigListBuilder;
import eu.project.ttc.eval.Corpus;
import eu.project.ttc.eval.EvaluatedMethod;
import eu.project.ttc.eval.RunTrace;
import eu.project.ttc.eval.TermSuiteEvals;
import eu.project.ttc.eval.TerminoConfig;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.tools.TermSuiteAlignerBuilder;
public class BilingualAlignementEvalRunner {
private static final Logger LOGGER = LoggerFactory.getLogger(TermSuiteEvals.class);
private AlignmentEvalService service;
public BilingualAlignementEvalRunner() {
super();
this.service = new AlignmentEvalService();
}
public static void main(String[] args) throws IOException {
new BilingualAlignementEvalRunner().run();
}
private void run() throws IOException {
LOGGER.info("Starting the bilingual aligner evaluation script");
Stopwatch sw = Stopwatch.createStarted();
service.langPairs().forEach( langPair -> {
for(EvaluatedMethod evaluatedMethod:EvaluatedMethod.values()) {
try(Writer resultWriter = service.getResultWriter(langPair, evaluatedMethod)) {
for(TerminoConfig config:ConfigListBuilder.start().frequencies(1,5).scopes(3).list()) {
for(Corpus corpus:Corpus.values()) {
try {
AlignmentEvalRun run = new AlignmentEvalRun(langPair, evaluatedMethod, corpus, config);
if(service.hasRef(run.getCorpus(), run.getLangPair())) {
runEval(run);
service.saveRunTrace(run);
service.writeResultLine(resultWriter, run);
}
} catch (DictionaryNotFound e) {
LOGGER.warn("Skipping evaluation because dictionary not found: %s", e.getPath());
}
}
}
} catch (IOException e1) {
LOGGER.error("IO error during eval", e1);
}
}
});
sw.stop();
LOGGER.info("Finished evaluation of bilingual aligner in {}", sw.toString());
}
public RunTrace runEval(AlignmentEvalRun run) throws IOException, DictionaryNotFound {
RunTrace trace = run.getTrace();
LOGGER.info(String.format("Running evaluation %s", run));
Path dicoPath = TermSuiteEvals.getDictionaryPath(run.getLangPair());
if(!dicoPath.toFile().isFile())
throw new DictionaryNotFound(dicoPath.toString());
TermIndex sourceTermino = TermSuiteEvals.getTerminology(run.getCorpus(), run.getLangPair().getSource(), run.getTerminoConfig());
TermIndex targetTermino = TermSuiteEvals.getTerminology(run.getCorpus(), run.getLangPair().getTarget(), run.getTerminoConfig());
BilingualAligner aligner = TermSuiteAlignerBuilder.start()
.setSourceTerminology(sourceTermino)
.setTargetTerminology(targetTermino)
.setDicoPath(dicoPath.toString())
.setDistanceCosine()
.create();
service.getRefFile(run)
.pairs(sourceTermino, targetTermino)
.filter(pair -> run.getEvaluatedMethod().acceptPair(aligner, pair[0], pair[1]))
.forEach(pair -> {
Term expectedTerm = pair[1];
Term sourceTerm = pair[0];
try {
LOGGER.debug("Aligning source term <{}>. Expecting target term <{}>",
sourceTerm.getGroupingKey(),
expectedTerm.getGroupingKey());
List<TranslationCandidate> targets = run.getEvaluatedMethod().align(aligner, sourceTerm, 100, 1);
if(targets.isEmpty()) {
trace.newTry(new AlignmentRecord(sourceTerm, expectedTerm)
.setValid(true)
.setSuccess(false)
.setComment("Empty candidate list"));
LOGGER.debug("Candidate list returned by aligner is empty");
} else {
if(targets.get(0).getTerm().equals(expectedTerm)) {
trace.newTry( new AlignmentRecord(sourceTerm, expectedTerm)
.setValid(true)
.setSuccess(true)
.setMethod(targets.get(0).getMethod())
.setComment("OK"));
LOGGER.debug("SUCCESS");
} else {
OptionalInt index = IntStream.range(0, targets.size())
.filter(i -> targets.get(i).getTerm().equals(expectedTerm))
.findFirst();
if(index.isPresent()) {
trace.newTry(new AlignmentRecord(sourceTerm, expectedTerm)
.setValid(true)
.setSuccess(false)
.setMethod(targets.get(index.getAsInt()).getMethod())
.setComment("Term found at position: " + index.getAsInt() + 1)
.setTargetTermCandidatePosition( index.getAsInt() + 1));
LOGGER.debug("FAILED. Position of expected term: {}",index.getAsInt() + 1);
} else {
trace.newTry(new AlignmentRecord(sourceTerm, expectedTerm)
.setValid(true)
.setSuccess(false)
.setComment("Expected term not found in candidates"));
LOGGER.debug("FAILED. Target term not found in candidates");
}
}
}
} catch(RequiresSize2Exception e) {
trace.newTry(new AlignmentRecord(sourceTerm, expectedTerm)
.setValid(false)
.setComment("Source term has too many lemmas"));
LOGGER.warn("Source term has too many lemmas <{}>. Alignment for such terms not yet implemented.", sourceTerm);
}
});
return trace;
}
}