// Copyright 2013 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.test.morph.mapper.czech;
import java.util.Map;
import marmot.morph.mapper.czech.ConllReader;
import marmot.morph.mapper.czech.MsdReader;
import marmot.util.Counter;
import org.junit.Test;
public class PdtMsdMapperTest {
@Test
public void test() {
String msd_file = "/nfs/data1/proj/marmot/treebanks/mteV4-2010-05-14/ana/oana-cs.txt";
Map<String, Counter<String>> msd_map = MsdReader.getDict(msd_file);
String ptb_file = "/mounts/data/proj/marmot/treebanks/conll09/cze/CoNLL2009-ST-Czech-train.txt";
Map<String, Counter<String>> ptb_map = ConllReader.getDict(ptb_file);
int number = 0;
int error = 0;
for (Map.Entry<String, Counter<String>> entry : msd_map.entrySet()) {
Counter<String> msd_counter = entry.getValue();
Counter<String> ptb_counter = ptb_map.get(entry.getKey());
if (ptb_counter != null) {
double msd_count = msd_counter.totalCount();
double ptb_count = ptb_counter.totalCount();
if (msd_count > 5 && ptb_count > 5) {
if (msd_counter.size() == 1 || ptb_counter.size() == 1) {
String msd_tag = msd_counter.sortedEntries().iterator()
.next().getKey();
String ptb_tag = ptb_counter.sortedEntries().iterator()
.next().getKey();
if (msd_tag.charAt(0) == ptb_tag.charAt(0)) {
if (!(msd_tag.startsWith("r") && ptb_tag
.startsWith("r"))) {
if (!msd_tag.equals(ptb_tag)) {
System.err.println(entry.getKey() + " "
+ msd_tag + " " + ptb_tag);
error++;
}
number++;
}
}
}
}
}
}
System.err.println(error + " / " + number);
}
}