// Copyright 2013 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.morph.mapper.czech; import marmot.morph.mapper.MorphTag; public class PdtMorphTag implements MorphTag { enum Pos { a, // Adjective c, // Numeral d, // Adverb i, // Interjection j, // Conjunction n, // Noun p, // Pronoun v, // Verb r, // Preposition t, // Particle z, //Punctuation (also used for the Sentence Boundary token) x, _, }; enum Type { Dash, // Sentence boundary Z - punctuation Percent, // Author's signature, e.g. haš-99_:B_;S N - noun Asterisk, // Word krát (lit.: times) C - numeral Comma, // Conjunction subordinate (incl. aby, kdyby in all forms) J - // conjuction Bracket, // Numeral, written using Roman numerals (XIV) C - numeral Colon, // Punctuation (except for the virtual sentence boundary word // ###, which uses the the section called // "2 - Detailed part of speech" #) Z - punctuation Equals, // Number written using digits C - numeral Questionmark, // Numeral kolik (lit. how many/how much) C - numeral At, // Unrecognized word form X - unknown Zircumflex, // ^ Conjunction (connecting main clauses, not subordinate) // J - conjunction One, // Two, // Three, // Four, // Relative/interrogative pronoun with adjectival declension of // both types (soft and hard) (jaký, který, čí, ..., lit. what, // which, whose, ...) P - pronoun Five, // The pronoun he in forms requested after any preposition (with // prefix n-: něj, něho, ..., lit. him in various cases) P - // pronoun Six, // Reflexive pronoun se in long forms (sebe, sobě, sebou, lit. // myself / yourself / herself / himself in various cases; se is // personless) P - pronoun Seven, // Reflexive pronouns se (the section called "5 - Case" = 4), si // (the section called "5 - Case" = 3), plus the same two forms // with contracted -s: ses, sis (distinguished by the section // called "8 - Person" = 2; also number is singular only) This // should be done somehow more consistently, virtually any word // can have this contracted -s (cos, polívkus, ...) // P,// - pronoun Eight, // Possessive reflexive pronoun svůj (lit. my/your/her/his when // the possessor is the subject of the sentence) P - pronoun Nine, // Relative pronoun jenž, již, ... after a preposition (n-: něhož, // niž, ..., lit. who) P - pronoun A, // Adjective, general A - adjective B, // Verb, present or future form V - verb C, // Adjective, nominal (short, participial) form rád, schopen, ... A - // adjective D, // Pronoun, demonstrative (ten, onen, ..., lit. this, that, that ... // over there, ... ) P - pronoun E, // Relative pronoun což (corresponding to English which in // subordinate clauses referring to a part of the preceding text) P // - pronoun F, // Preposition, part of; never appears isolated, always in a phrase // (nehledě (na), vzhledem (k), ..., lit. regardless, because of) R // - preposition G, // Adjective derived from present transgressive form of a verb A - // adjective H, // Personal pronoun, clitical (short) form (mě, mi, ti, mu, ...); // these forms are used in the second position in a clause (lit. me, // you, her, him), even though some of them (mě) might be regularly // used anywhere as well P - pronoun I, // Interjections I - interjection J, // Relative pronoun jenž, již, ... not after a preposition (lit. who, // whom) P - pronoun K, // Relative/interrogative pronoun kdo (lit. who), incl. forms with // affixes -ž and -s (affixes are distinguished by the category // Table 2.16, "VAR" (for -ž) and the section called "8 - Person" // (for -s)) P - pronoun L, // Pronoun, indefinite všechnen, sám (lit. all, alone) P - pronoun M, // Adjective derived from verbal past transgressive form A - // adjective N, // Noun (general) N - noun O, // Pronoun svůj, nesvůj, tentam alone (lit. own self, not-in-mood, // gone) P - pronoun P, // Personal pronoun já, ty, on (lit. I, you, he ) (incl. forms with // the enclitic -s, e.g. tys, lit. you're); gender position is used // for third person to distinguish on/ona/ono (lit. he/she/it), and // number for all three persons P - pronoun Q, // Pronoun relative/interrogative co, copak, cožpak (lit. what, // isn't-it-true-that) P - pronoun R, // Preposition (general, without vocalization) R - preposition S, // Pronoun possessive můj, tvůj, jeho (lit. my, your, his); gender // position used for third person to distinguish jeho, její, jeho // (lit. his, her, its), and number for all three pronouns P - // pronoun T, // Particle T - particle U, // Adjective possessive (with the masculine ending -ův as well as // feminine -in) A - adjective V, // Preposition (with vocalization -e or -u): (ve, pode, ku, ..., lit. // in, under, to) R - preposition W, // Pronoun negative (nic, nikdo, nijaký, žádný, ..., lit. nothing, // nobody, not-worth-mentioning, no/none) P - pronoun X, // (temporary) Word form recognized, but tag is missing in dictionary // due to delays in (asynchronous) dictionary creation Y, // Pronoun relative/interrogative co as an enclitic (after a // preposition) (oč, nač, zač, lit. about what, on/onto what, // after/for what) P - pronoun Z, // Pronoun indefinite (nějaký, některý, číkoli, cosi, ..., lit. some, // some, anybody's, something) P - pronoun a, // Numeral, indefinite (mnoho, málo, tolik, několik, kdovíkolik, ..., // lit. much/many, little/few, that much/many, some (number of), // who-knows-how-much/many) C - numeral b, // Adverb (without a possibility to form negation and degrees of // comparison, e.g. pozadu, naplocho, ..., lit. behind, flatly); // i.e. both the the section called "11 - Negation" as well as the // Table 2.13, "GRADE" attributes in the same tag are marked by - // (Not applicable) D - adverb c, // Conditional (of the verb být (lit. to be) only) (by, bych, bys, // bychom, byste, lit. would) V - verb d, // Numeral, generic with adjectival declension (dvojí, desaterý, ..., // lit. two-kinds/..., ten-...) C - numeral e, // Verb, transgressive present (endings -e/-ě, -íc, -íce) V - verb f, // Verb, infinitive V - verb g, // Adverb (forming negation (??? set to A/N) and degrees of // comparison Table 2.13, "GRADE" set to 1/2/3 // (comparative/superlative), e.g. velký, za\-jí\-ma\-vý, ..., lit. // big, interesting h, // Numeral, generic; only jedny and nejedny (lit. one-kind/sort-of, // not-only-one-kind/sort-of) C - numeral i, // Verb, imperative form V - verb j, // Numeral, generic greater than or equal to 4 used as a syntactic // noun (čtvero, desatero, ..., lit. four-kinds/sorts-of, ten-...) C // - numeral k, // Numeral, generic greater than or equal to 4 used as a syntactic // adjective, short form (čtvery, ..., lit. four-kinds/sorts-of) C - // numeral l, // Numeral, cardinal jeden, dva, tři, čtyři, půl, ... (lit. one, two, // three, four); also sto and tisíc (lit. hundred, thousand) if noun // declension is not used C - numeral m, // Verb, past transgressive; also archaic present transgressive of // perfective verbs (ex.: udělav, lit. (he-)having-done; arch. also // udělaje (Table 2.16, "VAR" = 4), lit. (he-)having-done) V - verb n, // Numeral, cardinal greater than or equal to 5 C - numeral o, // Numeral, multiplicative indefinite (-krát, lit. (times): // mnohokrát, tolikrát, ..., lit. many times, that many times) C - // numeral p, // Verb, past participle, active (including forms with the enclitic - // s, lit. 're (are)) V - verb q, // Verb, past participle, active, with the enclitic -ť, lit. // (perhaps) - could-you-imagine-that? or but-because- (both // archaic) V - verb r, // Numeral, ordinal (adjective declension without degrees of // comparison) C - numeral s, // Verb, past participle, passive (including forms with the enclitic // -s, lit. 're (are)) V - verb t, // Verb, present or future tense, with the enclitic -ť, lit. // (perhaps) -could-you-imagine-that? or but-because- (both archaic) // V - verb u, // Numeral, interrogative kolikrát, lit. how many times? C - numeral v, // Numeral, multiplicative, definite (-krát, lit. times: pětkrát, // ..., lit. five times) C - numeral w, // Numeral, indefinite, adjectival declension (nejeden, tolikátý, // ..., lit. not-only-one, so-many-times-repeated) C - numeral y, // Numeral, fraction ending at -ina; used as a noun (pětina, lit. // one-fifth) C - numeral z, // Numeral, interrogative kolikátý, lit. what (at-what-position- // place-in-a-sequence) C - numeral x, } enum Gender { f, // Feminine h, // {F, N} - Feminine or Neuter i, // Masculine inanimate m, // Masculine animate n, // Neuter q, // Feminine (with singular only) or Neuter (with plural only); used // only with participles and nominal forms of adjectives t, // Masculine inanimate or Feminine (plural only); used only with // participles and nominal forms of adjectives y, // {M, I} - Masculine (either animate or inanimate) z, // {M, I, N} - Not fenimine (i.e., Masculine animate/inanimate or // Neuter); only for (some) pronoun forms and certain numerals _, } enum Number { d, // Dual , e.g. nohama p, // Plural, e.g. nohami s, // Singular, e.g. noha w, // Singular for feminine gender, plural with neuter; can only appear // in participle or nominal adjective form with gender value Q _, } enum Case { nom, // Nominative, e.g. žena gen, // Genitive, e.g. ženy dat, // Dative, e.g. ženě acc, // Accusative, e.g. ženu voc, // Vocative, e.g. ženo loc, // Locative, e.g. ženě ins, // Instrumental, e.g. ženou _ } // enum PossGender { // f, // Feminine, e.g. matčin, její // m, // Masculine animate (adjectives only), e.g. otců // z, // {M, I, N} - Not feminine, e.g. jeho // } // // enum PossNumber { // p,// Plural, e.g. náš // s,// Singular, e.g. můj // } enum Person { fst,// 1st person, e.g. píšu, píšeme snd,// 2nd person, e.g. píšeš, píšete thd,// 3rd person, e.g. píše, píšou _, } enum Tense { f,// Future h,// {R, P} - Past or Present p,// Present r,// Past _, } enum Degree { pos, // Positive, e.g. velký comp, // Comparative, e.g. větší sup,// Superlative, e.g. největší _, } enum Negation { a,// Affirmative (not negated), e.g. možný n,// Negated, e.g. nemožný _, } enum Voice { a,// Active, e.g. píšící p,// Passive, e.g. psaný _, } public Pos pos_; public Tense tense_; public Person person_; public Number number_; public Type type_; public Gender gender_; public Case case_; public Degree degree_; public Negation negation_; public Voice voice_; public PdtMorphTag() { reset(); } void reset() { pos_ = Pos._; type_ = Type.X; tense_ = Tense._; person_ = Person._; number_ = Number._; gender_ = Gender._; case_ = Case._; degree_ = Degree._; negation_ = Negation._; voice_ = Voice._; } @Override public String toHumanMorphString() { return pos_.toString() + type_ + gender_ + number_ + case_ + person_ + tense_ + degree_ + negation_ + voice_; } @Override public String toPosString() { return pos_.toString(); } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((case_ == null) ? 0 : case_.hashCode()); result = prime * result + ((degree_ == null) ? 0 : degree_.hashCode()); result = prime * result + ((gender_ == null) ? 0 : gender_.hashCode()); result = prime * result + ((negation_ == null) ? 0 : negation_.hashCode()); result = prime * result + ((number_ == null) ? 0 : number_.hashCode()); result = prime * result + ((person_ == null) ? 0 : person_.hashCode()); result = prime * result + ((pos_ == null) ? 0 : pos_.hashCode()); result = prime * result + ((tense_ == null) ? 0 : tense_.hashCode()); result = prime * result + ((type_ == null) ? 0 : type_.hashCode()); result = prime * result + ((voice_ == null) ? 0 : voice_.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; PdtMorphTag other = (PdtMorphTag) obj; if (case_ != other.case_) return false; if (degree_ != other.degree_) return false; if (gender_ != other.gender_) return false; if (negation_ != other.negation_) return false; if (number_ != other.number_) return false; if (person_ != other.person_) return false; if (pos_ != other.pos_) return false; if (tense_ != other.tense_) return false; if (type_ != other.type_) return false; if (voice_ != other.voice_) return false; return true; } }