package wikokit.base.wikt.multi.en;
import wikokit.base.wikt.multi.en.WRelationEn;
import java.util.Map;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import wikokit.base.wikipedia.language.LanguageType;
import wikokit.base.wikt.constant.Relation;
import wikokit.base.wikt.util.POSText;
import wikokit.base.wikt.word.WRelation;
import wikokit.base.wikt.constant.POS;
import wikokit.base.wikt.util.LabelsWikiText;
import wikokit.base.wikt.util.WikiText;
public class WRelationEnTest {
public static String test_hrunk, word_text, flower_text,
empty_relation,
empty_relation2,
bark, man, man2, women, nationality, airplane,
Suomija_template_l, poljento_template_l;
// todo parse: man
public WRelationEnTest() {
}
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
test_hrunk =
"# Definition hrunk 1.\n" +
"# Definition hrunk 2.\n" +
"\n" +
"====Synonyms====\n" +
"* (''flrink with cumplus''): [[flrink]], [[pigglehick]]\n" +
"* (''furp''): [[furp]], [[whoodleplunk]]";
word_text =
"{{en-noun}}\n" +
"\n" +
"# {{linguistics}} A distinct unit of language\n" +
"# A distinct unit of language which is approved by some authority.\n" +
"# Something [[promise]]d, (as in a [[contract]] or [[oath]]).\n" +
"# [[news|News]]; [[tidings]].\n" +
"# {{theology|sometimes '''[[Word]]'''}} [[God]].\n" +
"#* See [[s:Bible (King James)/1 John|Bible, King James, John 1]]\n" +
"# {{theology|sometimes '''[[Word]]'''}} The [[bible|Bible]].\n" +
"\n" +
"====Usage notes====\n" +
"* \n" +
"====Synonyms====\n" +
"* {{sense|distinct unit of language}} [[vocable]]\n" +
"* {{sense|something promised}} [[promise]]\n" +
"* {{sense|God}} [[God]], [[logos|Logos]]\n" +
"* {{sense|Bible}} [[word of God]], [[Bible]]\n" +
"* See also [[Wikisaurus:word]]\n" +
"\n" +
"====Translations====\n" +
"{{trans-top|unit of language}}\n" +
"* [[Afrikaans]]: [[woord]]\n" +
"\n";
flower_text = "====Synonyms====\n" +
"* {{sense|inflorescence that resembles a flower}} [[head]], [[pseudanthium]]\n" +
"* {{sense|best examples}} [[cream]]\n" +
"* {{sense|best state of things}} [[prime]]\n" +
"\n";
empty_relation = "====Hyponyms====\n" +
"* See also [[Wikisaurus:tree]]\n" +
"\n";
empty_relation2 = // human
"{{wikipedia}}\n" +
"{{en-noun}}\n" +
"\n" +
"# A [[human being]], whether [[man]], [[woman]] or [[child]].\n" +
"\n" +
"====Translations====\n";
bark = "{{en-verb}}\n" +
"=====Related terms=====\n" +
"* [[barking]]\n" +
"\n" +
"\n" +
"=====Synonyms=====\n" +
"* [[latrate]] {{i|obsolete}}\n" +
"\n" +
"=====Translations=====\n";
/* todo
man = "{{en-noun|men}}\n" +
"\n" +
"# An adult [[male]] [[human]].\n" +
"# A [[mensch]]; a person of [[integrity]] and [[honor]].\n" +
"#* '''1883''', [[w:Robert Louis Stevenson|Robert Louis Stevenson]], ''[[wikisource:Treasure Island|Treasure Island]]''\n" +
"#*: ''He's more a '''man''' than any pair of rats of you in this here house...''\n" +
"# An abstract [[person]]; a person of either gender, usually an adult.\n" +
"#: ''every '''man''' for himself''\n" +
"# {{context|collective}} All humans collectively; [[mankind]]. Also [[Man]].\n" +
"#: ''prehistoric '''man'''''\n" +
"# A [[piece]] or [[token]] used in board games such as [[chess]].\n" +
"# A [[professional]] person.\n" +
"#: ''We'll have to call a '''man''' in to fix it\n" +
"\n" +
"====Synonyms====\n" +
"* {{sense|adult male human}} [[omi]] {{qualifier|Polari}}\n" +
"\n" +
"====Antonyms====\n" +
"* [[woman]]\n" +
"* [[boy]]\n" +
"\n" +
"====Derived terms====\n";
*/
man2 = "# {{colloquial|lang=fo}} [[one]], [[they]] {{i|indefinite third person singular pronoun}}\n" +
"\n" +
"====Synonyms====\n" +
"* {{sense|standard}} [[mann]]\n" +
"\n" +
"----\n";
nationality = "{{en-noun|nationalit|ies}}\n" +
"\n" +
"# {{rfc-sense}} Membership of a particular [[nation]] or [[state]], by origin, birth, naturalization, ownership, allegiance or otherwise.\n" +
"# National, i.e. [[ethnic]] and/or cultural, character or identity.\n" +
"\n" +
"====Synonyms====\n" +
"* {{sense|membership of a nation or state}} [[affiliation]], [[allegiance]], [[ancestry]], [[citizenship]], [[descent]], [[enfranchisement]], [[ethnicity]], [[national status]], [[naturalization]], [[origin]], [[parentage]], [[race]], [[residence]], [[status]]\n" +
"* {{sense|national character or identity}} [[ancestry]], [[color]], [[colour]], [[ethnicity]], [[identity]], [[origin]]\n" +
"\n" +
"====Related terms====\n" +
"* [[nation]], [[national]], [[nationalise]], [[nationalism]], [[nationalist]], [[nationalistic]], [[nationalize]]\n" +
"* [[nationhood]]\n" +
"\n" +
"====See also====\n" +
"* [[related term]]\n" +
"\n";
women = "{{wikipedia}}\n" +
"{{en-noun|women}}\n" +
"\n" +
"# An [[adult]] [[female]] [[human being]].\n" +
"\n" +
"====Synonyms====\n" +
"See [[Wikisaurus:woman]]\n" +
"* [[female]]\n" +
"* [[lady]]\n" +
"\n" +
"====Antonyms====\n" +
"* [[girl]]\n" +
"* [[man]]\n" +
"\n" +
"====Derived terms====\n" +
"\n";
airplane = "{{en-noun}}\n" +
"# {{US}} A powered heavier-than air [[aircraft]] with fixed [[wing]]s.\n" +
"\n" +
"====Synonyms====\n" +
"* [[aeroplane]].\n" +
"\n" +
"====Translations====\n" +
"\n";
Suomija_template_l = "{{lt-proper noun|f}}\n" +
"\n" +
"# [[Finland]]\n" +
"\n" +
"====Synonyms====\n" +
"* {{l|lt|Suomijos Respublika}}\n" +
"\n";
poljento_template_l = "{{fi-noun}}\n" +
"\n" +
"# [[rhythm]]\n" +
"\n" +
"====Synonyms====\n" +
"* {{l|la|gustus|gustūs}}, {{l|fi|tahti}}\n" +
"\n" +
"[[io:poljento]]\n";
}
@After
public void tearDown() {
}
@Test
public void testParse_hrunk() {
System.out.println("parse_hrunk");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "test_hrunk";
POSText pt = new POSText(POS.noun, test_hrunk);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * (''flrink with cumplus''): [[flrink]], [[pigglehick]]
// * (''furp''): [[furp]], [[whoodleplunk]]
r = result.get(Relation.synonymy);
assertEquals(2, r.length);
str = r[0].getMeaningSummary();
assertNotNull(str);
assertTrue(str.equalsIgnoreCase("flrink with cumplus"));
str = r[1].getMeaningSummary();
assertNotNull(str);
assertTrue(str.equalsIgnoreCase("furp"));
LabelsWikiText[] synonym_row_0 = r[0].get();
assertEquals(2, synonym_row_0.length);
assertTrue(synonym_row_0[0].getWikiText().getVisibleText().equalsIgnoreCase("flrink"));
assertTrue(synonym_row_0[1].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("pigglehick"));
LabelsWikiText[] synonym_row_1 = r[1].get();
assertEquals(2, synonym_row_0.length);
assertTrue(synonym_row_1[0].getWikiText().getVisibleText().equalsIgnoreCase("furp"));
assertTrue(synonym_row_1[1].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("whoodleplunk"));
/* flower_text */
page_title = "flower";
pt = new POSText(POS.noun, flower_text);
result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * {{sense|inflorescence that resembles a flower}} [[head]], [[pseudanthium]]
// * {{sense|best examples}} [[cream]]
// * {{sense|best state of things}} [[prime]]
r = result.get(Relation.synonymy);
assertEquals(3, r.length);
str = r[0].getMeaningSummary();
assertNotNull(str);
assertTrue(str.equalsIgnoreCase("inflorescence that resembles a flower"));
str = r[1].getMeaningSummary();
assertNotNull(str);
assertTrue(str.equalsIgnoreCase("best examples"));
synonym_row_0 = r[0].get();
assertEquals(2, synonym_row_0.length);
assertTrue(synonym_row_0[0].getWikiText().getVisibleText().equalsIgnoreCase("head"));
assertTrue(synonym_row_0[1].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("pseudanthium"));
}
/* # {{colloquial|lang=fo}} [[one]], [[they]] {{i|indefinite third person singular pronoun}}
*
* ====Synonyms====
* * {{sense|standard}} [[mann]]
*
* ---- (Dividing line between languages)
*/
@Test
public void testParse_with_Dividing_line() {
System.out.println("parse_with_Dividing_line");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "test_man2";
POSText pt = new POSText(POS.noun, man2);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * {{sense|standard}} [[mann]]
//
// ----
r = result.get(Relation.synonymy);
assertEquals(1, r.length);
str = r[0].getMeaningSummary();
assertNotNull(str);
assertTrue(str.equalsIgnoreCase("standard"));
LabelsWikiText[] synonym_row_0 = r[0].get();
assertEquals(1, synonym_row_0.length);
assertTrue(synonym_row_0[0].getWikiText().getVisibleText().equalsIgnoreCase("mann"));
assertTrue(synonym_row_0[0].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("mann"));
}
/* Tests wrong order: "Related terms" should be after "Synonyms" in really,
* but sometimes:
* {{en-verb}}
* =====Related terms=====
* * [[barking]]
*
* =====Synonyms=====
* * [[latrate]] {{i|obsolete}}
*
* =====Translations=====
* */
@Test
public void testParse_sections_wrong_order() {
System.out.println("parse_sections_wrong_order");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "bark";
POSText pt = new POSText(POS.noun, bark);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * [[latrate]] {{i|obsolete}}
r = result.get(Relation.synonymy);
assertEquals(1, r.length);
str = r[0].getMeaningSummary();
assertNull(str);
LabelsWikiText[] synonym_row_0 = r[0].get();
assertEquals(1, synonym_row_0.length);
assertTrue(synonym_row_0[0].getWikiText().getVisibleText().equalsIgnoreCase("latrate {{i|obsolete}}"));
assertTrue(synonym_row_0[0].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("latrate"));
// TODO label: obsolete
// ...
}
/* Tests the wrong order: "See also" is located after "Related terms", so it
* should be skipped, since "See also" can be used not only for semantic
* relations, but also for etymologically related words
*
* ====Synonyms====
*
* =====Related terms=====
*
* ====See also====
* * [[related term]]
* */
@Test
public void testParse_sections_wrong_order_for_See_also() {
System.out.println("parse_sections_wrong_order_for_See_also");
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "nationality";
POSText pt = new POSText(POS.noun, nationality);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertEquals(1, result.size());
assertTrue(result.containsKey(Relation.synonymy));
}
// * [[aeroplane]]. -> aeroplane
@Test
public void testParse_skip_dot_after_synonym() {
System.out.println("parse__skip_dot_after_synonym");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "airplane";
POSText pt = new POSText(POS.noun, airplane);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * [[aeroplane]].
r = result.get(Relation.synonymy);
assertEquals(1, r.length);
str = r[0].getMeaningSummary();
assertNull(str);
LabelsWikiText[] synonym_row_0 = r[0].get();
assertEquals(1, synonym_row_0.length);
assertTrue(synonym_row_0[0].getWikiText().getVisibleText().equalsIgnoreCase("aeroplane"));
assertTrue(synonym_row_0[0].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("aeroplane"));
}
/** The relation section may absent..., e.g.:
*
* {{wikipedia}}
* {{en-noun}}
*
* # A [[human being]], whether [[man]], [[woman]] or [[child]].
*
* ====Translations====
*/
@Test
public void testParse_empty_relation_section() {
System.out.println("parse_empty_relation_section");
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "human_empty_relation";
POSText pt = new POSText(POS.noun, empty_relation2);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertEquals(0, result.size());
}
// Template:l should be parsed, and synonyms should be extracted from this template.
// @see http://en.wiktionary.org/wiki/Template:l
@Test
public void testParse_template_l_with_one_synonym() {
System.out.println("parse__template_l_with_one_synonym");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "Suomija";
POSText pt = new POSText(POS.proper_noun, Suomija_template_l);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * {{l|lt|Suomijos Respublika}}
r = result.get(Relation.synonymy);
assertEquals(1, r.length);
str = r[0].getMeaningSummary();
assertNull(str);
LabelsWikiText[] synonym_row_0 = r[0].get();
assertEquals(1, synonym_row_0.length);
assertTrue(synonym_row_0[0].getWikiText().getVisibleText().equalsIgnoreCase("Suomijos Respublika"));
assertTrue(synonym_row_0[0].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("Suomijos Respublika"));
}
@Test
public void testParse_template_l_with_two_synonyms() {
System.out.println("parse__template_l_with_two_synonyms");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "poljento";
POSText pt = new POSText(POS.noun, poljento_template_l);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertTrue(result.size() > 0);
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * {{l|la|gustus|gustūs}}, {{l|fi|tahti}}
r = result.get(Relation.synonymy);
assertEquals(1, r.length);
str = r[0].getMeaningSummary();
assertNull(str);
LabelsWikiText[] synonym_row = r[0].get();
assertEquals(2, synonym_row.length);
assertTrue(synonym_row[0].getWikiText().getVisibleText().equalsIgnoreCase("gustus"));
assertTrue(synonym_row[0].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("gustus"));
assertTrue(synonym_row[1].getWikiText().getVisibleText().equalsIgnoreCase("tahti"));
assertTrue(synonym_row[1].getWikiText().getWikiWords()[0].getWordLink().equalsIgnoreCase("tahti"));
}
/** Let's skip now the link to Wikisaurus, e.g.:
* ====Synonyms====
* * See also [[Wikisaurus:word]]
*
* In future: parse Wikisaurus
*/
@Test
public void testParse_Wikisaurus_link() {
System.out.println("parse_Wikisaurus_link");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "word";
POSText pt = new POSText(POS.noun, word_text);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertEquals(1, result.size());
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// * {{sense|distinct unit of language}} [[vocable]]
// * {{sense|something promised}} [[promise]]
// * {{sense|God}} [[God]], [[logos|Logos]]
// * {{sense|Bible}} [[word of God]], [[Bible]]
// * See also [[Wikisaurus:word]] // it's a 5th line of synonyms
r = result.get(Relation.synonymy);
assertEquals(4, r.length);
}
/** Wrong location of Wikisaurus link: at first row (it should be in the last):
* ====Synonyms====
* See [[Wikisaurus:woman]]
* * [[female]]
* * [[lady]]
*/
@Test
public void testParse_Wikisaurus_link_at_first_row() {
System.out.println("parse_Wikisaurus_link_at_first_row");
WRelation[] r;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "women";
POSText pt = new POSText(POS.noun, women);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertEquals(2, result.size()); // syn + ant
assertTrue(result.containsKey(Relation.synonymy));
// ====Synonyms====
// See [[Wikisaurus:woman]]
// * [[female]]
// * [[lady]]
// ====Antonyms====
// * [[girl]]
// * [[man]]
r = result.get(Relation.synonymy);
assertEquals(2, r.length); // female and lady
}
/** Let's skip now the link to Wikisaurus, e.g.:
* ====Hyponyms====
* * See also [[Wikisaurus:tree]]
*
* In future: parse Wikisaurus
*/
@Test
public void testParse_empty_relation_with_Wikisaurus_link() {
System.out.println("parse_empty_relation_with_Wikisaurus_link");
WRelation[] r;
String str;
LanguageType wikt_lang = LanguageType.en; // English Wiktionary
String page_title = "word_empty_relation";
POSText pt = new POSText(POS.noun, empty_relation);
Map<Relation, WRelation[]> result = WRelationEn.parse(wikt_lang, page_title, pt);
assertEquals(0, result.size());
// todo:
// The link to Wikisaurus is ommited now... to parse in future.
//assertTrue(result.containsKey(Relation.hyponymy));
// ====Hyponyms====
// * See also [[Wikisaurus:tree]]
//r = result.get(Relation.hyponymy);
//assertEquals(1, r.length);
}
}