/* WPOS.java - corresponds to a Part of Speech level of Wiktionary word.
*
* Copyright (c) 2008 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under GNU General Public License.
*/
package wikokit.base.wikt.word;
import wikokit.base.wikipedia.language.LanguageType;
import wikokit.base.wikt.util.POSText;
import wikokit.base.wikt.util.LangText;
import wikokit.base.wikt.constant.POS;
import wikokit.base.wikt.constant.Relation;
import wikokit.base.wikt.multi.ru.WPOSRu;
import wikokit.base.wikt.multi.en.WEtymologyEn;
import wikokit.base.wikt.multi.en.WPOSEn;
import java.util.Map;
/** Part of Speech may be a misnomer... It is the key descriptor for the
* lexical function of the term in question (such as 'noun', 'verb', etc).
* The definitions themselves come within its scope. In addition to the
* traditional “parts of speech” it has come to include entities that are less
* than words, such as initialisms and suffixes, and items that are
* more than words, such as idiomatic expressions, phrases and proverbs.
* This heading is nestable. It is most frequently in a level three heading,
* but may have a lower level for terms that have multiple etymologies or
* pronunciations.
*
* WPOS consists of <PRE>
* # Meaning (Definition (preceded by "#", which causes automatic numbering) + Quotations).
* # Semantic relations (synonyms, antonyms, etc.) only for this (first, second...) meaning
* # Translation </PRE>
*
* See http://en.wiktionary.org/wiki/Wiktionary:Entry_layout_explained
* @see wikt.sql.TLangPOS
*/
public class WPOS {
/** Part of speech. */
private POS pos_type;
// Pronunciations
// todo ...
/** (1) Meaning consists of Definitions + Quotations. */
private WMeaning[] meaning;
/** (2) Semantic relations: synonymy, antonymy, etc.
* The map from semantic relation (e.g. synonymy) to array of WRelation
* (one WRelation contains a list of synonyms for one meaning).
*/
private Map<Relation, WRelation[]> relation;
/** (3) Translation */
private WTranslation[] translation;
/** Text which is not belong to any POS texts, e.g. Bibliography, Links...*/
//public StringBuffer remain_text;
private final static WPOS[] NULL_WPOS_ARRAY = new WPOS[0];
/** Gets part of speech. */
public POS getPOS() {
return pos_type;
}
/** Gets all senses. */
public WMeaning[] getAllMeanings() {
return meaning;
}
/** Gets all relations. */
public Map<Relation, WRelation[]> getAllRelations() {
return relation;
}
/** Gets all translations. */
public WTranslation[] getAllTranslation() {
return translation;
}
/** Frees memory recursively. */
public void free ()
{
if(null != meaning) {
for(int i=0; i<meaning.length; i++) {
meaning[i].free();
meaning[i] = null;
}
meaning = null;
}
if(null != relation) {
for(WRelation[] wr : relation.values()) {
for(WRelation r : wr) {
if(null != r)
r.free();
}
}
relation.clear();
relation = null;
}
if(null != translation) {
for(int i=0; i<translation.length; i++) {
translation[i].free();
translation[i] = null;
}
translation = null;
}
}
/** Parses text, creates and fills array of meanings (WLanguage),
* semantic relations, translations.
*
* @param wikt_lang language of Wiktionary
* @param page_title word which are described in this article 'text'
* @param text LangText defines language of this POS stored in "text"
* @return
*/
public static WPOS[] parse (
LanguageType wikt_lang,
String page_title,
LangText lang_section)
{
// == Level II. Part of speech ==
POSText[] pt = WPOS.splitToPOSSections(wikt_lang, page_title, lang_section);
if(0==pt.length) {
return NULL_WPOS_ARRAY;
}
WPOS[] wpos = new WPOS[pt.length]; // result
for(int j=0; j<pt.length; j++) {
wpos[j] = new WPOS();
wpos[j].pos_type = pt[j].getPOSType();
wpos[j].meaning = WMeaning.parse(wikt_lang, page_title, lang_section.getLanguage(), pt[j]);
// === III. Semantic relations ==
wpos[j].relation = WRelation.parse(wikt_lang, page_title, lang_section.getLanguage(), pt[j]);
// === III. Translations ==
wpos[j].translation = WTranslation.parse(wikt_lang, page_title, lang_section.getLanguage(), pt[j]);
}
return wpos;
}
/** Splits text to fragments related to different parts of speech (POS).
* @param page_title word which are described in this article text */
public static POSText[] splitToPOSSections (
LanguageType wikt_lang,
String page_title,
LangText source_langtext)
{
POSText[] pos_sections; // result will be stored to
LanguageType l = wikt_lang;
if(l == LanguageType.ru) {
pos_sections = WPOSRu.splitToPOSSections(page_title, source_langtext);
} else if(l == LanguageType.en) {
LangText[] etymology_sections = WEtymologyEn.splitToEtymologySections(page_title, source_langtext);
pos_sections = WPOSEn.splitToPOSSections(page_title, etymology_sections);
// return WordEn;
//} //else if(code.equalsIgnoreCase( "simple" )) {
// return WordSimple;
// todo
// ...
} else {
throw new NullPointerException("Null LanguageType");
}
return pos_sections;
}
}