// Copyright 2013 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.morph.mapper.czech; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import marmot.morph.mapper.czech.PdtMorphTag.Gender; import marmot.util.LineIterator; public class Mapping { Map<String, Map<String, Set<PdtMorphTag>>> map_ = new HashMap<String, Map<String, Set<PdtMorphTag>>>(); public void init_fst_map(String filename) { LineIterator iterator = new LineIterator(filename); map_ = new HashMap<String, Map<String, Set<PdtMorphTag>>>(); while (iterator.hasNext()) { List<String> line = iterator.next(); if (!line.isEmpty()) { String form = line.get(0); Map<String, Set<PdtMorphTag>> lemmas = map_.get(form); if (lemmas == null) { lemmas = new HashMap<String, Set<PdtMorphTag>>(); map_.put(form, lemmas); } String lemma = line.get(1); Set<PdtMorphTag> tags = lemmas.get(lemma); if (tags == null) { tags = new HashSet<PdtMorphTag>(); lemmas.put(lemma, tags); } String tag = line.get(2); String feat = line.get(3); PdtMorphTagReader reader = new PdtMorphTagReader(); PdtMorphTag pdt_tag = reader.parse_positional(tag + feat); // System.err.println(pdt_tag.toHumanMorphString().toUpperCase() + " " + tag + feat); tags.add(pdt_tag); } } } public void create_mapping(String mte_file) { LineIterator iter = new LineIterator(mte_file); MsdReader reader = new MsdReader(); PdTMsdMapper mapper = new PdTMsdMapper(); while (iter.hasNext()) { List<String> line = iter.next(); if (line.isEmpty()) { continue; } String form = line.get(0); Map<String, Set<PdtMorphTag>> map = map_.get(form); if (map != null) { String lemma = line.get(1); Set<PdtMorphTag> set = getLemmaSet(map, lemma); String msd_tag_string = line.get(2); MsdTag msd_tag = reader.parse(msd_tag_string); if (set.size() == 1) { PdtMorphTag pdt_tag = set.iterator().next(); MsdTag msd_tag_mapped = mapper.map(pdt_tag); //if (!msd_tag_mapped.toHumanString().equals(msd_tag.toHumanString())) { if (pdt_tag.gender_ == Gender.q) { //if (msd_tag.gender_ == MsdTag.Gender.f && msd_tag.tense_ == MsdTag.Tense.s && msd_tag.number_ == MsdTag.Number.s) { //if (msd_tag.toHumanString().equals(msd_tag_mapped.toHumanString())) { System.err.println(form + " " + pdt_tag.toHumanMorphString() + " " + msd_tag_mapped.toHumanString() + " " + msd_tag.toHumanString()); } //} //} } } } } private Set<PdtMorphTag> getLemmaSet(Map<String, Set<PdtMorphTag>> map, String lemma) { Set<PdtMorphTag> set = map.get(lemma); if (set != null) { return set; } set = new HashSet<PdtMorphTag>(); for (Set<PdtMorphTag> current_set : map.values()) { set.addAll(current_set); } return set; } public static void main(String[] args) { Mapping m = new Mapping(); m.init_fst_map(args[1]); m.create_mapping(args[0]); } }