/* POSTypeEn.java - English names of parts of speech.
*
* Copyright (c) 2008-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under EPL/LGPL/GPL/AL/BSD multi-license.
*/
package wikokit.base.wikt.multi.en;
import wikokit.base.wikt.constant.POSType;
import wikokit.base.wikt.constant.POS;
import java.util.Map;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import wikokit.base.wikipedia.util.StringUtil;
/** English names of parts of speech.
*
* @see http://en.wiktionary.org/wiki/Wiktionary:Entry_layout_explained/POS_headers
*/
public class POSTemplateEn extends POSType {
/*private POSTypeEn(String name_in_text, String name, POS type) {
super(name_in_text, name, type);
}*/
/** POS name encountered in the Wiktionary, e.g.: {{acronym}} or Acronym,
* since there are ==={{acronym}}=== and ===Acronym===
*/
private final String name_in_text;
/** POS */
private final POS type;
private static Map<String, POS> name2type = new HashMap<String, POS>();
/** E.g. verb -> "verb", "verb form", "verb prefix". It is used in POS statistics. */
private static Map<POS, Set<String>> type2name_in_text = new HashMap<POS, Set<String>>();
private final static String[] NULL_STRING_ARRAY = new String[0];
/** Initialization for POSTypeEn, POSTypeRu, etc. */
private POSTemplateEn(String name_in_text, POS type) {
//super(name_in_text, type);
this.name_in_text = name_in_text;
this.type = type; // english.english;
name2type.put(name_in_text, type); // english.english);
{ // store (POS, +=name_in_text) -> type2name_in_text
Set<String> templates = type2name_in_text.get(type);
if(null == templates)
templates = new HashSet<String>();
templates.add(name_in_text);
type2name_in_text.put(type, templates);
}
}
public String getName() { return type.toString(); }
/** Checks weather exists the language code 'code'. */
public static boolean has(String code) {
return name2type.containsKey(code);
}
/** Gets part of speech by its abbreviation */
public static POS get(String code) {
return name2type.get(code);
}
/** Gets (token separated) abbreviations or templates used in order
* to recognize the "pos" part of speech.
*/
public static String getTemplates(String token, POS pos) {
Set<String> templates = type2name_in_text.get(pos);
return StringUtil.join(", ", (String[])templates.toArray(NULL_STRING_ARRAY));
}
// The classical parts of speech are:
public static final POSType noun = new POSTemplateEn("noun", POS.noun);
public static final POSType verb = new POSTemplateEn("verb", POS.verb);
public static final POSType verb_form = new POSTemplateEn("verb form", POS.verb);
public static final POSType verb_prefix = new POSTemplateEn("verb prefix", POS.verb);
public static final POSType adverb = new POSTemplateEn("adverb", POS.adverb);
public static final POSType adjective = new POSTemplateEn("adjective", POS.adjective);
public static final POSType adjectival_noun = new POSTemplateEn("adjectival noun", POS.adjective);
public static final POSType quasi_adjective = new POSTemplateEn("quasi-adjective", POS.adjective);
public static final POSType pronoun = new POSTemplateEn("pronoun", POS.pronoun);
public static final POSType conjunction = new POSTemplateEn("conjunction", POS.conjunction);
public static final POSType interjection = new POSTemplateEn("interjection", POS.interjection);
public static final POSType preposition = new POSTemplateEn("preposition", POS.preposition);
public static final POSType prepositional_phrase = new POSTemplateEn("prepositional phrase", POS.prepositional_phrase);
// Additional commonly used grammatical headers are:
public static final POSType proper_noun = new POSTemplateEn("proper noun", POS.proper_noun);
public static final POSType article = new POSTemplateEn("article", POS.article);
public static final POSType prefix = new POSTemplateEn("prefix", POS.prefix);
public static final POSType suffix = new POSTemplateEn("suffix", POS.suffix);
// may be the same: Idiom, Phrase, Noun, Verb
public static final POSType phrase = new POSTemplateEn("phrase", POS.phrase);
// see e.g. "your mileage may vary" ==Idiom==
// "rain cats and dogs" ===Verb=== # {{idiom}} // idiomatic is ContextLabel
// "grain of salt" ===Noun=== # {{idiom}} // idiomatic is ContextLabel
public static final POSType idiom = new POSTemplateEn("idiom", POS.idiom);
// other descriptors that identify the usage of the entry, but which are not (strictly speaking) parts of speech:
public static final POSType acronym_template= new POSTemplateEn("{{acronym}}", POS.acronym); // ==={{acronym}}===
public static final POSType acronym = new POSTemplateEn("acronym", POS.acronym); // ===Acronym===
public static final POSType abbreviation_template = new POSTemplateEn("{{abbreviation}}", POS.abbreviation);// ==={{abbreviation}}===
public static final POSType abbreviation = new POSTemplateEn("abbreviation", POS.abbreviation);
public static final POSType initialism_template = new POSTemplateEn("{{initialism}}", POS.initialism);
public static final POSType initialism = new POSTemplateEn("initialism", POS.initialism);
public static final POSType contraction = new POSTemplateEn("contraction", POS.contraction);
public static final POSType symbol = new POSTemplateEn("symbol", POS.symbol);
public static final POSType letter = new POSTemplateEn("letter", POS.letter);
// debated POS level 3 headers
public static final POSType number = new POSTemplateEn("numeral", POS.numeral);
public static final POSType numeral = new POSTemplateEn("number", POS.numeral);
public static final POSType cardinal_number = new POSTemplateEn("cardinal number", POS.numeral);
public static final POSType ordinal_number = new POSTemplateEn("ordinal number", POS.numeral);
public static final POSType cardinal_numeral = new POSTemplateEn("cardinal numeral", POS.numeral);
public static final POSType ordinal_numeral = new POSTemplateEn("ordinal numeral", POS.numeral);
// other headers in use
public static final POSType particle = new POSTemplateEn("particle", POS.particle); // (language) particles, CJKV languages, and some others; see tok, ne.
public static final POSType participle = new POSTemplateEn("participle",POS.participle);
public static final POSType determiner = new POSTemplateEn("determiner",POS.determiner);
public static final POSType infix = new POSTemplateEn("infix", POS.infix);
public static final POSType interfix = new POSTemplateEn("interfix",POS.interfix);
public static final POSType affix = new POSTemplateEn("affix", POS.affix);
public static final POSType circumfix = new POSTemplateEn("circumfix", POS.circumfix);
public static final POSType counter = new POSTemplateEn("counter", POS.counter);
public static final POSType kanji = new POSTemplateEn("kanji", POS.kanji);
public static final POSType kanji_reading = new POSTemplateEn("kanji reading", POS.kanji_reading);
public static final POSType hanja_reading = new POSTemplateEn("hanja reading", POS.hanja_reading);
public static final POSType hiragana_letter = new POSTemplateEn("hiragana letter", POS.hiragana_letter);
public static final POSType katakana_letter = new POSTemplateEn("katakana letter", POS.katakana_letter);
public static final POSType pinyin = new POSTemplateEn("pinyin", POS.pinyin);
public static final POSType han_character = new POSTemplateEn("han character", POS.han_character);
public static final POSType hanzi = new POSTemplateEn("hanzi", POS.hanzi);
public static final POSType hanja = new POSTemplateEn("hanja", POS.hanja);
public static final POSType proverb = new POSTemplateEn("proverb", POS.proverb);
public static final POSType expression = new POSTemplateEn("expression", POS.expression);
public static final POSType possessive_adjective = new POSTemplateEn("possessive_adjective", POS.possessive_adjective);
public static final POSType postposition = new POSTemplateEn("postposition", POS.postposition);
public static final POSType gerund = new POSTemplateEn("gerund", POS.gerund);
public static final POSType pronominal_adverb = new POSTemplateEn("pronominal adverb", POS.pronominal_adverb);
public static final POSType adnominal = new POSTemplateEn("adnominal", POS.adnominal);
public static final POSType root = new POSTemplateEn("root", POS.root);
public static final POSType pinyin_syllable = new POSTemplateEn("pinyin syllable", POS.pinyin_syllable);
public static final POSType syllable = new POSTemplateEn("syllable", POS.syllable);
public static final POSType hiragana_character = new POSTemplateEn("hiragana character", POS.hiragana_character);
public static final POSType katakana_character = new POSTemplateEn("katakana character", POS.katakana_character);
public static final POSType jyutping_syllable = new POSTemplateEn("jyutping syllable", POS.jyutping_syllable);
public static final POSType gismu = new POSTemplateEn("gismu", POS.gismu);
public static final POSType lujvo = new POSTemplateEn("lujvo", POS.lujvo);
public static final POSType classifier = new POSTemplateEn("classifier", POS.classifier);
public static final POSType predicative = new POSTemplateEn("predicative", POS.predicative);
public static final POSType measure_word = new POSTemplateEn("measure word", POS.measure_word);
public static final POSType correlative = new POSTemplateEn("correlative", POS.correlative);
public static final POSType preverb = new POSTemplateEn("preverb", POS.preverb);
public static final POSType prenoun = new POSTemplateEn("prenoun", POS.prenoun);
public static final POSType noun_stem = new POSTemplateEn("noun stem", POS.noun_stem);
public static final POSType noun_class = new POSTemplateEn("noun class", POS.noun_class);
public static final POSType combined_kana_character = new POSTemplateEn("combined-kana character", POS.combined_kana_character);
// public static final POSType = new POSTemplateEn("", POS.);
// Non-standard, deprecated headers
}