/* POSTypeRu.java - Names of POS templates in Russian Wiktionary.
*
* Copyright (c) 2008-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under EPL/LGPL/GPL/AL/BSD multi-license.
*/
package wikokit.base.wikt.multi.ru;
import wikokit.base.wikt.constant.POSType;
import wikokit.base.wikt.constant.POS;
import java.util.Map;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import wikokit.base.wikipedia.util.StringUtil;
/** Names of POS templates in Russian Wiktionary.
*
* See http://ru.wiktionary.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D1%81%D0%BB%D0%BE%D0%B2%D0%B0%D1%80%D1%8C:%D0%A7%D0%B0%D1%81%D1%82%D0%B8_%D1%80%D0%B5%D1%87%D0%B8
* http://ru.wiktionary.org/wiki/Викисловарь:Части речи
*
* Категория:Шаблоны словоизменений
*/
public class POSTemplateRu extends POSType {
/** POS name encountered in the Wiktionary, e.g.: {{acronym}} or Acronym,
* since there are ==={{acronym}}=== and ===Acronym===
*/
private final String name_in_text;
/** POS name in Russian, e.g. "Акроним" for "acronym" */
//private final String native_name; // todo ... HashMap type -> Russian POS name
/** POS */
private final POS type;
//private static Map<String, String> text2name = new HashMap<String, String>();
private static Map<String, POS> name_in_text2type = new HashMap<String, POS>();
/** E.g. noun -> "сущ", "падежи", "фам". It is used in POS statistics. */
private static Map<POS, Set<String>> type2name_in_text = new HashMap<POS, Set<String>>();
private final static String[] NULL_STRING_ARRAY = new String[0];
/** Initialization for POSTypeEn, POSTypeRu, etc. */
private POSTemplateRu(String name_in_text, POS type) {
this.name_in_text = name_in_text;
this.type = type; // english.english;
name_in_text2type.put(name_in_text, type); // english.english);
{ // store (POS, +=name_in_text) -> type2name_in_text
Set<String> templates = type2name_in_text.get(type);
if(null == templates)
templates = new HashSet();
templates.add(name_in_text);
type2name_in_text.put(type, templates);
}
}
public String getName() { return type.toString(); }
/** Checks whether the part of speech with the abbreviation 'code' exists. */
public static boolean has(String code) {
return name_in_text2type.containsKey(code);
}
/** Check whether the given abbreviation 'code' contains a known
* part of speech tag. Return unknown if not. */
public static POS isPOSIn(String code){
for (Map.Entry<String, POS> entry : name_in_text2type.entrySet()) {
if (code.contains(entry.getKey()))
return entry.getValue();
}
return POS.unknown;
}
/** Gets part of speech by its abbreviation or template */
public static POS get(String code) {
return name_in_text2type.get(code);
}
/** Gets (token separated) abbreviations or templates used (by parser)
* in order to recognize the "pos" part of speech.
*/
public static String getTemplates(String token, POS pos) {
Set<String> templates = type2name_in_text.get(pos);
if(null == templates)
return "";
return StringUtil.join(", ", (String[])templates.toArray(NULL_STRING_ARRAY));
}
// The classical parts of speech are:
// ===Морфологические и синтаксические свойства===
// {{СущМужНеодуш1c(1)
// {{СущЖенНеодуш8a
// Существительное, ...
// public static final POSLocal noun = new POSRu(, POS.noun);
public static final POSType noun = new POSTemplateRu("сущ", POS.noun);
public static final POSType noun_m_inanimate = new POSTemplateRu("сущмужнеодуш", POS.noun);// СущМужНеодуш-пол - noun
public static final POSType noun_old= new POSTemplateRu("падежи", POS.noun);// "существительное",
public static final POSType noun_surname = new POSTemplateRu("фам", POS.noun);// Фам - Surname (noun)
// ===Морфологические и синтаксические свойства===
// {{парадигма-рус
// |шаблон=Гл11b/c
//
// {{Гл1a
public static final POSType verb = new POSTemplateRu("гл", POS.verb); // "глагол",
public static final POSType verb_old_ru = new POSTemplateRu("глагол", POS.verb);
// {{adv-ru|
// Наречие, неизменяемое.
public static final POSType adverb_template = new POSTemplateRu("adv", POS.adverb);// "наречие", adv ru, adv-ru
public static final POSType adverb_word = new POSTemplateRu("наречие", POS.adverb);// "наречие",
public static final POSType adverb_word2 = new POSTemplateRu("нар", POS.adverb); // "наречие",
// {{прил en|round|слоги=round}}
public static final POSType adjective = new POSTemplateRu("прил", POS.adjective);// "прилагательное"
public static final POSType adjective_old_en = new POSTemplateRu("adjective",POS.adjective);
// прил0 - used only once, skip
// прил-сравн - works without this line, since "прил-сравн" starts from "прил-"... public static final POSType adjective_comparative_degree = new POSTemplateRu("прил-сравн",POS.adjective);
// {{мест ru 6*b
public static final POSType pronoun = new POSTemplateRu("мест", POS.pronoun);
public static final POSType pronoun2 = new POSTemplateRu("местоимения", POS.pronoun);
public static final POSType pronoun_addon = new POSTemplateRu("мс", POS.pronoun);
public static final POSType conjunction = new POSTemplateRu("conj", POS.conjunction);// союз
public static final POSType conjunction2 = new POSTemplateRu("союз", POS.conjunction);
public static final POSType interjection = new POSTemplateRu("interj", POS.interjection);// междометие
public static final POSType interjection2 = new POSTemplateRu("межд", POS.interjection);
public static final POSType preposition = new POSTemplateRu("prep", POS.preposition);// Предлог
public static final POSType postposition = new POSTemplateRu("послелог", POS.postposition);
// Additional commonly used grammatical headers are:
// proper_noun ?
public static final POSType article = new POSTemplateRu("art", POS.article);// артикль
public static final POSType article2 = new POSTemplateRu("article", POS.article);
public static final POSType prefix = new POSTemplateRu("prefix", POS.prefix);// приставка
public static final POSType suffix = new POSTemplateRu("suffix", POS.suffix);// суффикс
// phrase: there is special functions: WPOSRu.isPhrasePOS()
// idiom - in phrase
// prepositional_phrase - may be in phrase
// debated POS level 3 headers
public static final POSType numeral = new POSTemplateRu("числ", POS.numeral);// числительное
// other descriptors that identify the usage of the entry, but which are not (strictly speaking) parts of speech:
// acronym ?
public static final POSType abbreviation = new POSTemplateRu("abbrev", POS.abbreviation);// Аббревиатура
// other headers in use
public static final POSType particle1 = new POSTemplateRu("part", POS.particle);// частица, part ru, part-ru
public static final POSType particle3 = new POSTemplateRu("particle", POS.particle);
public static final POSType participle = new POSTemplateRu("прич", POS.participle);// Причастие
public static final POSType predicative = new POSTemplateRu("predic", POS.predicative);// Именная часть составного сказуемого, предикатив
// only in Russian Wiktionary (yet)
public static final POSType verb_interjection = new POSTemplateRu("interj1", POS.verb_interjection);// interj1 - глагольно-междометное слово - verb-interjection word
public static final POSType parenthesis = new POSTemplateRu("intro", POS.parenthesis);// Вводное слово
public static final POSType prefix_of_compound = new POSTemplateRu("init", POS.prefix_of_compound);// первая часть сложных слов
// ! "init" in ruwikt (Первая часть сложных слов) <> "initialism" in enwikt
}