// Copyright 2013 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.morph.mapper.latin; import java.util.Collections; import java.util.HashSet; import java.util.Set; import marmot.morph.mapper.latin.ItMorphTag.CaseNumber; import marmot.morph.mapper.latin.ItMorphTag.FlexionalCategory; import marmot.morph.mapper.latin.ItMorphTag.NominalsDegree; import marmot.morph.mapper.latin.ItMorphTag.ParticipialsDegree; import marmot.morph.mapper.latin.LdtMorphTag.Pos; public class BrandoliniRules { static Set<String> f11_nouns; static Set<String> f11_pronouns; static Set<String> f11_adjectives; static Set<String> f11_numbers; static Set<String> f11_adverbs; static Set<String> o4_conj; static Set<String> o4_adverb; static Set<String> o4_adverb_deprels; static Set<String> o4_conj_deprels; static Set<String> pronoun_lemmas; static Set<String> pronouns; static Set<String> nouns; { nouns = new HashSet<String>(); nouns.add("aristoteles"); nouns.add("boetius"); nouns.add("plato"); nouns.add("augustinus"); nouns.add("dionysius"); nouns.add("avicenna"); pronouns = new HashSet<String>(); pronouns.add("alterius"); pronouns.add("unumquodque"); pronouns.add("quicquid"); pronouns.add("cuiuslibet"); pronouns.add("quaelibet"); pronouns.add("qualibet"); pronouns.add("uniuscuiusque"); pronouns.add("quolibet"); pronouns.add("quodlibet"); pronouns.add("unicuique"); pronouns.add("cuilibet"); pronouns.add("unaquaeque"); f11_nouns = new HashSet<String>(); f11_nouns.add("anima"); f11_nouns.add("animabus"); f11_nouns.add("animae"); f11_nouns.add("animam"); f11_nouns.add("animarum"); f11_nouns.add("animas"); f11_nouns.add("bovis"); f11_nouns.add("dei"); f11_nouns.add("deo"); f11_nouns.add("deum"); f11_nouns.add("deus"); f11_nouns.add("domibus"); f11_nouns.add("domo"); f11_nouns.add("domui"); f11_nouns.add("domum"); f11_nouns.add("domus"); f11_nouns.add("vi"); f11_nouns.add("vim"); f11_nouns.add("vires"); f11_nouns.add("viribus"); f11_nouns.add("vis"); f11_pronouns = new HashSet<String>(); f11_pronouns.add("aliqua"); f11_pronouns.add("aliquid"); f11_pronouns.add("aliquis"); f11_pronouns.add("aliud"); f11_pronouns.add("ego"); f11_pronouns.add("me"); f11_pronouns.add("mihi"); f11_pronouns.add("nobis"); f11_pronouns.add("nobiscum"); f11_pronouns.add("nos"); f11_pronouns.add("se"); f11_pronouns.add("secum"); f11_pronouns.add("seipsa"); f11_pronouns.add("seipsam"); f11_pronouns.add("seipsas"); f11_pronouns.add("seipsis"); f11_pronouns.add("seipso"); f11_pronouns.add("seipsum"); f11_pronouns.add("semetipsum"); f11_pronouns.add("sese"); f11_pronouns.add("sibi"); f11_pronouns.add("sui"); f11_pronouns.add("te"); f11_pronouns.add("tu"); f11_pronouns.add("vestrum"); f11_pronouns.add("vobis"); f11_pronouns.add("vos"); // Alle zu Pronomen f11_adjectives = new HashSet<String>(); f11_adjectives.add("aliqui"); f11_adjectives.add("aliquod"); f11_adjectives.add("ambo"); f11_adjectives.add("amborum"); f11_adjectives.add("mei"); f11_adjectives.add("meum"); f11_adjectives.add("nulla"); f11_adjectives.add("nullam"); f11_adjectives.add("nullas"); f11_adjectives.add("nulli"); f11_adjectives.add("nullius"); f11_adjectives.add("nullo"); f11_adjectives.add("nullum"); f11_adjectives.add("nullus"); f11_adjectives.add("sola"); f11_adjectives.add("solae"); f11_adjectives.add("solam"); f11_adjectives.add("solius"); f11_adjectives.add("solo"); f11_adjectives.add("solus"); f11_adjectives.add("tota"); f11_adjectives.add("totam"); f11_adjectives.add("toti"); f11_adjectives.add("totius"); f11_adjectives.add("toto"); f11_adjectives.add("totum"); f11_numbers = new HashSet<String>(); f11_numbers.add("duabus"); f11_numbers.add("duae"); f11_numbers.add("duas"); f11_numbers.add("duo"); f11_numbers.add("duobus"); f11_numbers.add("duorum"); f11_numbers.add("duos"); f11_numbers.add("una"); f11_numbers.add("unam"); f11_numbers.add("uni"); f11_numbers.add("unius"); f11_numbers.add("uno"); f11_numbers.add("unum"); f11_numbers.add("unus"); f11_numbers.add("una"); f11_adverbs = new HashSet<String>(); f11_adverbs.add("aliter"); f11_adverbs.add("hinc"); o4_conj = new HashSet<String>(); o4_conj.add("ac"); o4_conj.add("aut"); o4_conj.add("autem"); o4_conj.add("enim"); o4_conj.add("et"); o4_conj.add("etiam"); o4_conj.add("igitur"); o4_conj.add("immo"); o4_conj.add("nam"); o4_conj.add("nec"); o4_conj.add("neque"); o4_conj.add("quasi"); o4_conj.add("quidem"); o4_conj.add("quod"); o4_conj.add("seu"); o4_conj.add("sic"); o4_conj.add("sicut"); o4_conj.add("sive"); o4_conj.add("tam"); o4_conj.add("tamquam"); o4_conj.add("ut"); o4_conj.add("utrum"); o4_conj.add("vel"); o4_conj.add("quando"); o4_conj.add("vero"); o4_adverb = new HashSet<String>(); o4_adverb.add("adhuc"); //o4_adverb.add("alias"); o4_adverb.add("deinde"); o4_adverb.add("dumtaxat"); o4_adverb.add("ergo"); o4_adverb.add("idcirco"); o4_adverb.add("ideo"); o4_adverb.add("inde"); o4_adverb.add("ita"); o4_adverb.add("item"); o4_adverb.add("nihilominus"); o4_adverb.add("postea"); o4_adverb.add("praeterea"); o4_adverb.add("quomodo"); o4_adverb.add("scilicet"); o4_adverb.add("simul"); o4_adverb.add("statim"); o4_adverb.add("tamen"); o4_adverb.add("tum"); o4_adverb.add("propterea"); o4_adverb.add("tunc"); o4_adverb.add("unde"); o4_adverb.add("usque"); o4_adverb.add("utpote"); o4_conj_deprels = new HashSet<String>(); o4_conj_deprels.add("auxc"); o4_conj_deprels.add("coord"); o4_conj_deprels.add("xseg"); o4_conj_deprels.add("apos"); o4_conj_deprels.add("auxy"); o4_adverb_deprels = new HashSet<String>(); o4_adverb_deprels.add("auxz"); o4_adverb_deprels.add("adv"); o4_adverb_deprels.add("atr"); o4_adverb_deprels.add("pred"); o4_adverb_deprels.add("sb"); o4_adverb_deprels.add("exd"); o4_adverb_deprels.add("obj"); o4_adverb_deprels.add("pnom"); o4_conj.add("nisi"); o4_adverb.add("quam"); o4_adverb.add("quamvis"); o4_adverb.add("tanto"); pronoun_lemmas = new HashSet<String>(); pronoun_lemmas.add("nullus"); pronoun_lemmas.add("ullus"); pronoun_lemmas.add("totus"); pronoun_lemmas.add("unus"); pronoun_lemmas.add("uter"); pronoun_lemmas.add("neuter"); pronoun_lemmas.add("alter"); pronoun_lemmas.add("alius"); pronoun_lemmas.add("solus"); } Set<Pos> getCandidates(String form, String lemma, String simple_deprel, LdtMorphTag ldt_tag, ItMorphTag it_tag) { int index = simple_deprel.indexOf('_'); if (index >= 0) { simple_deprel = simple_deprel.substring(0, index); } Set<Pos> candidates = new HashSet<Pos>(); switch (it_tag.flexional_type_) { case One: if (form.equals("artificis") && lemma.equals("artifex")) { return Collections.singleton(Pos.n); } if (pronoun_lemmas.contains(lemma)) { return Collections.singleton(Pos.p); } switch (it_tag.flexional_category_) { case F: if (it_tag.nominals_degree_ == NominalsDegree.One) { if (f11_nouns.contains(form)) { return Collections.singleton(Pos.n); } if (f11_pronouns.contains(form)) { return Collections.singleton(Pos.p); } if (f11_adjectives.contains(form)) { return Collections.singleton(Pos.a); } if (f11_numbers.contains(form)) { return Collections.singleton(Pos.m); } if (f11_adverbs.contains(form)) { ldt_tag.reset(); return Collections.singleton(Pos.d); } } break; case G: if (form.equals("esse")) { return Collections.singleton(Pos.n); } break; default: break; } if (it_tag.participials_degree_ == ParticipialsDegree.Undef) { if (it_tag.case_number == CaseNumber.G) { ldt_tag.reset(); return Collections.singleton(Pos.d); } //candidates.add(Pos.n); //candidates.add(Pos.a); //return candidates; } if (nouns.contains(form)) { return Collections.singleton(Pos.n); } if (pronouns.contains(form)) { return Collections.singleton(Pos.p); } break; case Four: if (it_tag.flexional_category_ == FlexionalCategory.O) { if (o4_conj.contains(form)) { return Collections.singleton(Pos.c); } if (o4_adverb.contains(form)) { return Collections.singleton(Pos.d); } if (o4_conj_deprels.contains(simple_deprel)) { return Collections.singleton(Pos.c); } if (o4_adverb_deprels.contains(simple_deprel)) { return Collections.singleton(Pos.d); } } //if (it_tag.flexional_category_ == FlexionalCategory.S) { // return Collections.singleton(Pos.r); //} break; default: break; } return candidates; } }