/* Label.java - contexual information for definitions, or Synonyms,
* or Translations.
*
* Copyright (c) 2013 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under EPL/LGPL/GPL/AL/BSD multi-license.
*/
package wikokit.base.wikt.constant;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import wikokit.base.wikipedia.language.LanguageType;
import wikokit.base.wikt.multi.en.name.LabelEn;
import wikokit.base.wikt.multi.ru.name.LabelRu;
/** Contextual information for definitions, such as archaic, by analogy,
* chemistry, etc.
*
* This contextual information is located in the sections: semantic relations
* and translations in Russian Wiktionary.
*
* This class describes context labels of English Wiktionary. Context labels
* of other wiktionaries (e.g. Russian Wiktionary) are described in
* LabelRu, German Wikt in LabelDe (todo), French Wikt in LabelFr (todo).
*
* @see (Scheme of Label abstract classes hierarchy - Wiktionary parser) wikokit\wikt_parser\doc\screenshots\Label_abstract_classes_hierarchy_IMG_20130205.jpg
* @see http://en.wiktionary.org/wiki/Template_talk:context
* @see http://en.wiktionary.org/wiki/Wiktionary:Entry_layout_explained
* @see http://en.wiktionary.org/wiki/Template:context
*/
public abstract class Label {
/** Label itself, e.g. {{obsolete}}, {{slang}}. */
protected String short_name;
/** Label name, e.g. 'New Zealand' for {{NZ}}. */
protected String name;
/** Weather the label was added manually to the code of wikokit, or it was gathered automatically by parser. */
// protected boolean added_by_hand; // true : added_by_hand == true if LabelCategory is not NULL in database,
// false: automatically i.e. some labels extracted from {{context|some label}}
/** Constructor for labels added by hand, @see list in LabelEn, LabelRu, etc. */
protected Label(String short_name, String name) {
if(short_name.length() == 0 || name.length() == 0)
System.out.println("Error in Label.Label(): one of parameters is empty! label="+short_name+"; name=\'"+name+"\'.");
this.short_name = short_name;
this.name = name;
}
/** Constructor for new context labels which are extracted by parser
* (1) from the template {{context|new label}} or
* (2) from semantic relations' labels (word (label), in ruwikt)
* and added automatically,
* these new labels are not listed in the LabelEn.
*
* @param short_name name of the found of context label
*/
public Label(String short_name) {
if(short_name.length() == 0)
System.out.println("Error in Label.Label(String short_name): label short_name is empty (\"\")!");
this.short_name = short_name;
this.name = "";
}
/** Checks whitespace characters in the prefix or suffix of a string.
* Prints "error" message if there is any.
*/
protected static void checksPrefixSuffixSpace(String s) {
if(s.charAt(0) == ' ' || s.charAt(s.length()-1) == ' ')
System.out.println("Error in Label.checksPrefixSuffixSpace(): there are leading spaces, string='"+s+"'.");
}
/** Gets English Wiktionary context label (LabelEn) associated with this label (e.g. LabelRu, LabelFr, etc.). */
abstract public LabelEn getLinkedLabelEn();
/** Sets LabelCategory for LabelLocal. */
abstract public void setCategory(LabelCategory _category);
abstract public LabelCategory getCategory();
@Override
public String toString() { return short_name; }
/** Gets label itself (short name). */
public String getShortName() {
return short_name;
}
/** Gets label itself (short name) in English.
* This functions is needed for comparison (equals()) with LabelLocal labels.
*/
abstract public String getShortNameEnglish();
/** Gets label full name. */
public String getName() {
return name;
}
/** Checks weather the label was added manually to the code of wikokit, or was gathered automatically by parser. */
/*public boolean getAddedByHand() {
return added_by_hand;
}*/
/** Gets (full) name of context label by label object.
*
* It is supposed that (1) number of context labels in enwikt > in ruwikt,
* (2) context labels in enwikt cover (include) labels in all other wiktionaries.
* So, first
*
* @param label
* @return
*/
/*public static String getName (Label label) {
String s = label2name.get(label);
if(null == s) // e.g. LabelRu don't has "dated_sense" label
return label.getName(); // if there is no translation into local language, then English name
return s;
}*/
/** Gets all labels. */
// ? abstract public static Collection<Label> getAllLabels();
/** Counts number of labels. */
// ?? abstract public static int size();
/** Gets all names of labels (short name). */
// ??? abstract public static Set<String> getAllLabelShortNames();
/** @return true if short name of two labels are the same. */
static public boolean equals (Label one, Label two) {
// !attention: non enwikt context labels added automatically have .getShortNameEnglish() == null :(
String en1 = one.getShortNameEnglish();
String en2 = two.getShortNameEnglish();
if(null != en1 && null != en2) // both labels English names are not null
return one.getShortNameEnglish().equals( two.getShortNameEnglish() );
if(null == en1 || null == en2) // i.e. one label English name is null, another is not null
return false;
// both labels English names are null, so we cannot compare English names
return one.short_name.equals( two.short_name );
}
/** The set of unknown labels, which were found during parsing.
* It should be only one message for one unknown label (for concise logging).
*/
private static Set<String> unknown_label = new HashSet<String>();
/** Checks weather exists the unknown label 'label'. */
public static boolean hasUnknownLabel(String label) {
return unknown_label.contains(label);
}
/** Adds unknown language code 'code'. */
public static boolean addUnknownLabel(String label) {
return unknown_label.add(label);
}
/** Gets all labels. */
public static Collection<Label> getAllLabels(LanguageType lang_code) {
Collection<Label> result;
LanguageType l = lang_code;
if(l == LanguageType.en) {
result = LabelEn.getAllLabels();
} else if(l == LanguageType.ru) {
result = LabelRu.getAllLabels();
//} //else if(code.equalsIgnoreCase( "simple" )) {
// todo
// ...
} else {
throw new NullPointerException("Exception in Label.getAllLabels(): Null LanguageType");
}
return result;
}
/** Counts number of labels. */
public static int size(LanguageType lang_code) {
int result;
LanguageType l = lang_code;
if(l == LanguageType.en) {
result = LabelEn.size();
} else if(l == LanguageType.ru) {
result = LabelRu.size();
//} //else if(code.equalsIgnoreCase( "simple" )) {
// todo
// ...
} else {
throw new NullPointerException("Exception in Label.size(): Null LanguageType");
}
return result;
}
/** Gets all names of labels (short name). */
public static Set<String> getAllLabelShortNames(LanguageType lang_code) {
Set<String> result;
LanguageType l = lang_code;
if(l == LanguageType.en) {
result = LabelEn.getAllLabelShortNames();
} else if(l == LanguageType.ru) {
result = LabelRu.getAllLabelShortNames();
//} //else if(code.equalsIgnoreCase( "simple" )) {
// todo
// ...
} else {
throw new NullPointerException("Exception in Label.getAllLabelShortNames(): Null LanguageType");
}
return result;
}
/** Checks weather exists the Label (short name) by its name, checks synonyms also. */
public static boolean hasShortName(String short_name, LanguageType lang_code) {
boolean result;
LanguageType l = lang_code;
if(l == LanguageType.en) {
result = LabelEn.hasShortName(short_name);
} else if(l == LanguageType.ru) {
result = LabelRu.hasShortName(short_name);
//} else if(l == LanguageType.??) {
// todo
// ...
} else {
throw new NullPointerException("Exception in Label.hasShortName(): Null LanguageType");
}
return result;
}
/** Gets label by short name of the label. */
public static Label getByShortName(String short_name, LanguageType lang_code) {
Label result;
LanguageType l = lang_code;
if(l == LanguageType.en) {
result = LabelEn.getByShortName(short_name);
} else if(l == LanguageType.ru) {
result = LabelRu.getByShortName(short_name);
//} else if(l == LanguageType.??) {
// todo
// ...
} else {
throw new NullPointerException("Exception in Label.getByShortName(): Null LanguageType");
}
return result;
}
}