// Copyright 2013 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.morph; import java.text.NumberFormat; import java.util.Locale; import marmot.core.Model; import marmot.core.Result; import marmot.core.Tagger; public class MorphResult implements Result { private static final long serialVersionUID = 1L; public int num_sentences; public int sentence_errors; public int num_unreachable_sentences; public int[] rank; public int num_states; public int candidates_length; public int num_oovs; public int[] oov_errors; public int morph_oov_errors; public int[] token_errors; public int morph_errors; public int num_tokens; public long time; private Model model_; public long sum_lattice_time; public int lemma_errors; public int lemma_oov_errors; public MorphResult(Tagger tagger) { this(tagger.getModel(), tagger.getNumLevels()); } public MorphResult(Model model, int level) { rank = new int[model.getOptions().getBeamSize()]; oov_errors = new int[level]; token_errors = new int[level]; model_ = model; } public void increment(MorphResult eval) { num_sentences += eval.num_sentences; sentence_errors += eval.sentence_errors; num_unreachable_sentences += eval.num_unreachable_sentences; morph_errors += eval.morph_errors; morph_oov_errors += eval.morph_oov_errors; num_states += eval.num_states; candidates_length += eval.candidates_length; num_oovs += eval.num_oovs; assert oov_errors.length == eval.oov_errors.length; for (int index = 0; index < eval.oov_errors.length; index++) oov_errors[index] += eval.oov_errors[index]; num_tokens += eval.num_tokens; assert token_errors.length == eval.token_errors.length; for (int index = 0; index < eval.token_errors.length; index++) token_errors[index] += eval.token_errors[index]; assert rank.length == eval.rank.length; for (int index = 0; index < rank.length; index++) { rank[index] += eval.rank[index]; } sum_lattice_time += eval.sum_lattice_time; lemma_errors += eval.lemma_errors; lemma_oov_errors += eval.lemma_oov_errors; time += eval.time; } @Override public String toString() { NumberFormat nf2 = NumberFormat.getInstance(Locale.ENGLISH); StringBuilder sb = new StringBuilder(); sb.append("Eval\n"); sb.append(String.format("Token : %s\n", toString(token_errors, num_tokens))); sb.append(String.format("all : %s\n\n", toString(morph_errors, num_tokens))); sb.append(String.format("lemma : %s\n\n", toString(lemma_errors, num_tokens))); sb.append(String.format("OOV : %s\n", toString(oov_errors, num_oovs))); sb.append(String.format("all : %s\n\n", toString(morph_oov_errors, num_oovs))); sb.append(String.format("lemma : %s\n\n", toString(lemma_oov_errors, num_oovs))); sb.append(String.format("Sentence : %s", toString(sentence_errors, num_sentences))); for (int i = 0; i < rank.length; i++) { sb.append(' '); sb.append(nf2.format(rank[i] * 100. / num_sentences)); sb.append('%'); } sb.append('\n'); if (num_unreachable_sentences > 0) { sb.append(String.format( "Unreachable : %s\n", toString(num_sentences - num_unreachable_sentences, num_sentences))); } if (candidates_length > 0) { sb.append(String.format("Candidates / Position : %s\n", num_states / (double) candidates_length)); } if (time > 1000) { System.err.format("Processed %d sentences at %g sentences/s\n", num_sentences, num_sentences / (time / 1000.)); } if (sum_lattice_time > 500) System.err.format("Lattice time: %gs\n", (sum_lattice_time / 1000.)); if (time - sum_lattice_time > 500) System.err.format("Viterbi time: %gs\n", ((time - sum_lattice_time) / 1000.)); return sb.toString(); } private String toString(int[] error, int total) { StringBuilder sb = new StringBuilder("\n"); for (int index = 0; index < error.length; index++) { String key = model_.getCategoryTable().toSymbol(index); sb.append(key); sb.append(": "); sb.append(toString(error[index], total)); if (index < error.length - 1) { sb.append('\n'); } } return sb.toString(); } private String toString(int error, int total) { int correct = total - error; return String.format("%d / %d = %g%%", correct, total, correct * 100. / total); } public double getTokenAccuracy() { return 100. - (morph_errors * 100. / num_tokens); } public double getOovTokenAccuracy() { return 100. - (morph_oov_errors * 100. / num_oovs); } @Override public double getScore() { return getTokenAccuracy(); } }