package lemming.lemma.cmd;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.List;
import lemming.lemma.LemmaCandidateGenerator;
import lemming.lemma.LemmaCandidateSet;
import lemming.lemma.LemmaInstance;
import lemming.lemma.edit.EditTreeGeneratorTrainer;
import lemming.lemma.edit.EditTreeGeneratorTrainer.EditTreeGeneratorTrainerOptions;
import marmot.morph.io.SentenceReader;
public class Stats {
/**
* @param args
*/
public static void main(String[] args) {
String train_file = args[0];
String dev_file = args[1];
int min_count = Integer.parseInt(args[2]);
List<LemmaInstance> training_instances = LemmaInstance.getInstances(new SentenceReader(train_file), -1);
List<LemmaInstance> dev_instances = LemmaInstance.getInstances(new SentenceReader(dev_file), -1);
String tag_independent = getStats(training_instances, dev_instances, false, min_count);
String tag_dependent = getStats(training_instances, dev_instances, true, min_count);
System.out.format("%s & %s \\\\\n", tag_independent, tag_dependent);
}
public static String getStats(List<LemmaInstance> training_instances,
List<LemmaInstance> dev_instances, boolean tag_dependent, int min_count) {
EditTreeGeneratorTrainer trainer = new EditTreeGeneratorTrainer();
trainer.getOptions().setOption(EditTreeGeneratorTrainerOptions.TAG_DEPENDENT, tag_dependent);
trainer.getOptions().setOption(EditTreeGeneratorTrainerOptions.MIN_COUNT, min_count);
LemmaCandidateGenerator generator = trainer.train(training_instances, null);
double num_token_candidates = 0;
double num_type_candidates = 0;
double num_tokens = 0;
double num_types = 0;
double correct_tokens = 0;
double correct_types = 0;
for (LemmaInstance instance : dev_instances) {
LemmaCandidateSet set = new LemmaCandidateSet();
generator.addCandidates(instance, set);
if (set.contains(instance.getLemma())) {
correct_tokens += instance.getCount();
correct_types += 1;
}
num_token_candidates += set.size() * instance.getCount();
num_type_candidates += set.size();
num_tokens += instance.getCount();
num_types+= 1.0;
}
return String.format("%s & %s\\%% & %s & %s\\%%", nice(num_token_candidates / num_tokens), nice(correct_tokens * 100. / num_tokens),
nice(num_type_candidates / num_types), nice(correct_types * 100. / num_types));
}
private static NumberFormat formatter = new DecimalFormat("#0.00");
private static String nice(double number) {
return formatter.format(number);
}
}