package wikokit.base.wikt.multi.ru; import wikokit.base.wikt.multi.ru.WPOSRu; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; import wikokit.base.wikt.util.POSText; import wikokit.base.wikt.util.LangText; import wikokit.base.wikipedia.language.LanguageType; import wikokit.base.wikt.constant.POS; public class WPOSRuTest { //Connect connect_ruwikt; // , connect_enwikt, connect_simplewikt; public WPOSRuTest() { } @BeforeClass public static void setUpClass() throws Exception { } @AfterClass public static void tearDownClass() throws Exception { } @Before public void setUp() { //connect_ruwikt = new Connect(); //connect_ruwikt.Open(Connect.RUWIKT_HOST,Connect.RUWIKT_DB,Connect.RUWIKT_USER,Connect.RUWIKT_PASS,LanguageType.ru); } @After public void tearDown() { //connect_ruwikt.Close(); } /* POS defined by the template {{заголовок|be|add=I}}, * where "add" is not empty. * * {{-be-}} * {{заголовок|be|add=I}} * {{заголовок|be|add=II}} */ @Test public void testSplitToPOSSections_add_parameter() { System.out.println("splitToPOSSections_add_parameter"); String str, s1, s2; POSText[] result; LangText lt; lt = new LangText(LanguageType.be); // two POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "{{заголовок|be|add=I}}\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{сущ be m|слоги={{по-слогам|шах}}|}}\n" + "\n"; s2 = "{{заголовок|be|add=II}}\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ be m|слоги={{по-слогам|}}|}}\n" + "\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("шах", lt); assertEquals(2, result.length); assertEquals(POS.noun, result[0].getPOSType()); assertEquals(POS.noun, result[1].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); // check of error: language code "en" != "be" lt = new LangText(LanguageType.en); s1 = "Before \n" + "{{заголовок|be|add=I}}\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{сущ be m|слоги={{по-слогам|шах}}|}}\n" + "\n"; s2 = "{{заголовок|be|add=II}}\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ be m|слоги={{по-слогам|}}|}}\n" + "\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("шах", lt); assertEquals(1, result.length); } /* POS defined by the template {{заголовок|add=I}}, * where there are only two parameters * * = {{-ru-}} = * {{заголовок|add=I}} * {{заголовок|add=II}} */ @Test public void testSplitToPOSSections_add_parameter_without_lang_parameter() { System.out.println("splitToPOSSections_add_parameter_without_lang_parameter"); String str, s1, s2; POSText[] result; LangText lt; lt = new LangText(LanguageType.ru); // two POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "{{заголовок|add=I}}\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{гл ru 12aСВ}}\n" + "\n"; s2 = "{{заголовок|add=II}}\n" + "===Морфологические и синтаксические свойства===\n" + "{{гл ru 12aСВ}}\n" + "\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("вздуть", lt); assertEquals(2, result.length); assertEquals(POS.verb, result[0].getPOSType()); assertEquals(POS.verb, result[1].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); } /* POS defined by the template {{заголовок|add=(прилагательное)}}, * where there are only two parameters, * and second parameter in brackets is POS * * = {{-ru-}} = * {{заголовок|add=(прилагательное)}} * {{заголовок|add=(cуществительное)}} */ @Test public void testSplitToPOSSections_name_in_brackets() { System.out.println("splitToPOSSections_name_in_brackets"); String str, s1, s2; POSText[] result; LangText lt; lt = new LangText(LanguageType.ru); // two POS in {{-ru-}} in Russian Wiktionary s1 = "Before \n" + "{{заголовок|add=(прилагательное)}}\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{прил ru 1bX}}\n" + "\n"; s2 = "{{заголовок|add=(cуществительное)}}\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ ru m a (п 1b)}}\n" + "\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("голубой", lt); assertEquals(2, result.length); assertEquals(POS.adjective, result[0].getPOSType()); assertEquals(POS.noun, result[1].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); } @Test public void guessPOS_till_hyphen_not_space() { System.out.println("testGuessPOS_till_hyphen_not_space"); String str, page_title; LangText lt; POSText pt; // particle page_title = "ага"; lt = new LangText(LanguageType.en); str = "Before \n" + "== ага́ I ==\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{part-ru|}}\n" + "\n"; lt.text = new StringBuffer(str); pt = WPOSRu.guessPOS(lt.text); assertEquals(POS.particle, pt.getPOSType()); // adverb page_title = "добро"; lt = new LangText(LanguageType.en); str = "Before \n" + "== добро II ==\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{adv-ru|до́б-ро}}\n" + "\n"; lt.text = new StringBuffer(str); pt = WPOSRu.guessPOS(lt.text); assertEquals(POS.adverb, pt.getPOSType()); } @Test public void guessPOS_one_more_line_between_header_and_POS_template() { System.out.println("testGuessPOS_one_more_line_between_header_and_POS_template"); String str, page_title; LangText lt; POSText pt; page_title = "шум"; lt = new LangText(LanguageType.en); str = "Before \n" + "=== Морфологические и синтаксические свойства ===\n" + "В значениях «беспорядочный звук», «обсуждения», «ссора»\n" + "{{сущ ru m ina 1a\n" + "\n"; lt.text = new StringBuffer(str); pt = WPOSRu.guessPOS(lt.text); assertEquals(POS.noun, pt.getPOSType()); } @Test public void guessPOSWith2ndLevelHeader() { System.out.println("testGuessPOSWith2ndLevelHeader"); String str, s1, s2, page_title; POS result; StringBuffer s; LangText lt; // I. Russian words in Russian Wiktionary // todo ... // + old format: ==Существительное== или ===Существительное=== // todo ... // II. English words in Russian Wiktionary // noun page_title = "bar"; lt = new LangText(LanguageType.en); str = "== bar II ==\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ en|nom-sg=bar|слоги=bar}}"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "bar II", lt.text); assertEquals(POS.noun, result); // one more noun (old version?) page_title = "адджындзинад"; lt = new LangText(LanguageType.en); str = "===Морфологические и синтаксические свойства===\n" + "{{падежи os|nom-sg={{PAGENAME}}}}"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "", lt.text); assertEquals(POS.noun, result); // adjective page_title = "round"; lt = new LangText(LanguageType.en); str = "== round I ==\n" + "===Морфологические и синтаксические свойства===\n" + "{{прил en|round|слоги=round}}"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "round I", lt.text); assertEquals(POS.adjective, result); // adverb (old style for "fast"): page_title = "fast"; lt = new LangText(LanguageType.en); str = "==Наречие==\n" + "{{нар en|fast}}"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "Наречие", lt.text); assertEquals(POS.adverb, result); // Verb III (old style for "отделять"): page_title = "отделять"; lt = new LangText(LanguageType.ru); str = "== Глагол I ==\n" + "# отделять"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "Глагол I", lt.text); assertEquals(POS.verb, result); // adverb (very old style for DE "fast") page_title = "fast"; lt = new LangText(LanguageType.en); // ? de or ru ? str = "<b>fast</b>\n" + "Наречие\n" + "==Произношение==\n" + "{{transcription|fɑst}}\n" + "==Значение==\n" + "[[почти]]"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "Произношение", lt.text); // assertEquals(POS.adjective, result); too complex now assertEquals(null, result); // too complex now: POS.unknown } @Test public void guessPOSWith2ndLevelHeader_POS_header_unknown_but_not_null() { System.out.println("splitToPOSSections_POS_header_unknown_but_not_null"); String str, page_title; POS result; LangText lt; page_title = "round"; lt = new LangText(LanguageType.en); str = "== round I ==\n" + "===3 level header===\n" + "text which do not describe POS"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "round I", lt.text); assertEquals(POS.unknown, result); // It's POS because "round I" == "round" + "I" } @Test public void guessPOSWith2ndLevelHeader_not_a_POS_header() { System.out.println("splitToPOSSections_not_a_POS_header"); String str, page_title; POS result; LangText lt; page_title = "bar"; lt = new LangText(LanguageType.en); str = "==References==\n" + "<references />"; lt.text = new StringBuffer(str); result = WPOSRu.guessPOSWith2ndLevelHeader(page_title, "bar", lt.text); assertEquals(null, result); } // == Ссылки == @Test public void testSplitToPOSSections_not_a_POS_header() { System.out.println("splitToPOSSections_not_a_POS_header"); String str, s1, s2; POSText[] result; StringBuffer s; LangText lt; lt = new LangText(LanguageType.en); // one POS in {{-os-}} in Russian Wiktionary s1 = "{{-os-}}\n" + "===Морфологические и синтаксические свойства===\n" + "{{падежи os\n" + "|nom-sg={{PAGENAME}}\n" + "|слоги={{по-слогам|а|.|га́}}\n" + "}}\n"; s2 = "== Ссылки ==\n" + "* Осетинско-русский словарь, 3-е дополненное издание, 1970 год\n" + "\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("ага", lt); assertEquals(1, result.length); assertEquals(POS.noun, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } @Test public void testSplitToPOSSections_english_words() { System.out.println("splitToPOSSections_english_words"); String str, s1, s2; POSText[] result; StringBuffer s; LangText lt; // I. Russian words in Russian Wiktionary // todo ... // + old format: ==Существительное== или ===Существительное=== // todo ... // II. English words in Russian Wiktionary // simple case: only one POS for English word "speak" {{-en-}} lt = new LangText(LanguageType.en); str = "\n ===Морфологические и синтаксические свойства===\n{{гл en irreg|speak|spoke|spoken}} text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.verb, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // two POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "== lead I ==\n" + "English text1 \n"; s2 = "== lead II== \n" + "===Морфологические и синтаксические свойства===\n" + "{{гл en reg|lead}}\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("lead", lt); assertEquals(2, result.length); assertEquals(POS.unknown, result[0].getPOSType()); assertEquals(POS.verb, result[1].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); // one POS s2 in {{-en-}} in Russian Wiktionary str = s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("lead", lt); assertEquals(1, result.length); assertEquals(POS.verb, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s2)); // + english words... // todo ... } ///////////////////////// // tests of different POS // noun // Фам - Surname (noun) @Test public void testSplitToPOSSections_POS_noun() { System.out.println("SplitToPOSSections_POS_noun"); String str, s1, s2, page_title; POSText[] result; LangText lt; // СущМужНеодуш - noun page_title = "полвека_test"; lt = new LangText(LanguageType.en); str = "===Морфологические и синтаксические свойства===\n" + "{{СущМужНеодуш-пол\n" + "|основа=ве́ка\n" + "}}\n"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections(page_title, lt); assertEquals(1, result.length); assertEquals(POS.noun, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // Фам - Surname (noun) page_title = "Новак_test"; lt = new LangText(LanguageType.en); str = "===Морфологические и синтаксические свойства===\n" + "{{Фам \n" + "|основа=Новак\n" + "|слоги={{по-слогам|Но|вак}}\n" + "}}\n"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections(page_title, lt); assertEquals(1, result.length); assertEquals(POS.noun, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // adjective @Test public void testSplitToPOSSections_POS_adjective() { System.out.println("SplitToPOSSections_POS_adjective"); String str, s1, s2, page_title; POSText[] result; StringBuffer s; LangText lt; // прил-сравн - adjective_comparative_degree page_title = "round"; lt = new LangText(LanguageType.en); str = "== round I ==\n" + "===Морфологические и синтаксические свойства===\n" + "{{прил-сравн ru|{{по-слогам|похле́ще}}}}"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections(page_title, lt); assertEquals(1, result.length); assertEquals(POS.adjective, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } @Test public void testSplitToPOSSections_ru_POS_pronoun() { System.out.println("splitToPOSSections_ru_POS_pronoun"); String str; POSText[] result; LangText lt; // Russian words in Russian Wiktionary // I.a pronoun: мест lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{мест ru 6*b\n}} text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.pronoun, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // I.b pronoun: Мс (+ check uppercase, i.e. Мс == мс) lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{Мс-п6b\n}} text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.pronoun, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } @Test public void testSplitToPOSSections_ru_POS_numeral() { System.out.println("splitToPOSSections_ru_POS_numeral"); String str; POSText[] result; LangText lt; // a) numeral: числ-2 lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{числ-2\n}} text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.numeral, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // b) numeral: числ lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{числ ru 3\n}} text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.numeral, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } @Test public void testSplitToPOSSections_ru_POS_conjunction() { System.out.println("splitToPOSSections_ru_POS_conjunction"); String str; POSText[] result; LangText lt; // conj conjunction lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{conj ru|противительный|слоги=но}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.conjunction, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // interj // междометие // interj1 - глагольно-междометное слово - verb-interjection word @Test public void testSplitToPOSSections_ru_POS_interjection() { System.out.println("splitToPOSSections_ru_POS_interjection"); String str; POSText[] result; LangText lt; // interj lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{interj ru \n}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.interjection, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // interj1 - глагольно-междометное слово - verb-interjection word lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n\n{{interj1 ru|слоги={{по-слогам|юрк}}}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.verb_interjection, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // {{prep-ru|внутрь}} // {{prep ru|за}} text. @Test public void testSplitToPOSSections_ru_POS_preposition() { System.out.println("splitToPOSSections_ru_POS_preposition"); String str; POSText[] result; LangText lt; // {{prep-ru|внутрь}} lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{prep-ru|внутрь}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.preposition, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // {{prep ru|за}} text. lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n\n{{prep ru|за}} text.\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.preposition, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // {{article en|the}} // and {{art XX|}} @Test public void testSplitToPOSSections_ru_POS_article() { System.out.println("splitToPOSSections_ru_POS_article"); String str; POSText[] result; LangText lt; // {{art XX|}} lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{art de|}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.article, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // {{article en|the}} lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{article en|the}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.article, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // {{prefix ru|без}} // and {{suffix ru|ка|оконч=ть}} @Test public void testSplitToPOSSections_ru_POS_prefix_suffix() { System.out.println("splitToPOSSections_ru_POS_prefix_suffix"); String str; POSText[] result; LangText lt; // {{prep-ru|внутрь}} lt = new LangText(LanguageType.ru); str = "\n ===Морфологические и синтаксические свойства===\n{{prefix ru|без}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.prefix, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // {{заголовок|add=(ь)}} // {{suffix ru|ен|оконч=ь}} lt = new LangText(LanguageType.ru); //str = "\n{{заголовок|add=(ь)}}\n{{suffix ru|ен|оконч=ь}}\n text1"; str = "\n ===Морфологические и синтаксические свойства===\n{{suffix ru|ен|оконч=ь}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.suffix, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // phrase @Test public void testSplitToPOSSections_ru_POS_phrase() { System.out.println("splitToPOSSections_ru_POS_phrase"); String str; POSText[] result; LangText lt; lt = new LangText(LanguageType.ru); str = "\n=== Тип и синтаксические свойства сочетания ===\n{{phrase|\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.phrase, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); // === Тип и синтаксические свойства сочетания === // {{phrase // |тип= lt = new LangText(LanguageType.ru); str = "\n=== Тип и синтаксические свойства сочетания ===\n{{phrase\n|тип=\n}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.phrase, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // abbrev - abbreviation @Test public void testSplitToPOSSections_ru_POS_abbreviation() { System.out.println("splitToPOSSections_ru_POS_abbreviation"); String str; POSText[] result; LangText lt; // {{abbrev|lang=en|роль=наречия}} lt = new LangText(LanguageType.ru); str = "\n=== Морфологические и синтаксические свойства ===\n\n{{abbrev|lang=en|роль=наречия}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.abbreviation, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // predicative - Именная часть составного сказуемого, предикатив @Test public void testSplitToPOSSections_ru_POS_predicative() { System.out.println("splitToPOSSections_ru_POS_predicative"); String str; POSText[] result; LangText lt; // {{predic ru|{{по-слогам|пол|ны́м-пол|но́}}}} lt = new LangText(LanguageType.ru); str = "\n=== Морфологические и синтаксические свойства ===\n\n{{predic ru|{{по-слогам|пол|ны́м-пол|но́}}}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.predicative, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // intro - parenthesis @Test public void testSplitToPOSSections_ru_POS_intro_parenthesis() { System.out.println("splitToPOSSections_ru_POS_intro_parenthesis"); String str; POSText[] result; LangText lt; lt = new LangText(LanguageType.ru); str = "\n=== Морфологические и синтаксические свойства ===\n\n{{intro ru|{{по-слогам|на|при|ме́р}}}}\n text1"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("test_word1", lt); assertEquals(1, result.length); assertEquals(POS.parenthesis, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // adjectival_participle - Причастие @Test public void testSplitToPOSSections_POS_adjectival_participle() { System.out.println("splitToPOSSections_POS_adjectival_participle"); String str; POSText[] result; LangText lt; lt = new LangText(LanguageType.ru); str = "\n=== Морфологические и синтаксические свойства ===\n" + "{{прич ru 1a-т\n"+ "|основа=мя́\n"+ "}}"; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("мятый_word1", lt); assertEquals(1, result.length); //assertEquals(POS.adjectival_participle, result[0].getPOSType()); assertEquals(POS.participle, result[0].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(str)); } // eo tests of different POS //////////////////////////// /** additional_second_level_blocks * <references /> */ @Test public void testSplitToPOSSections_additional_second_level_blocks() { System.out.println("splitToPOSSections_additional_second_level_blocks"); String str, s1, s2, second_level_block; POSText[] result; StringBuffer s; LangText lt; lt = new LangText(LanguageType.en); // two POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "== lead I ==\n" + "English text1 \n"; s2 = "== lead II== \n" + "===Морфологические и синтаксические свойства===\n" + "{{гл en reg|lead}}\n"; second_level_block = "==Примечания==\n" + "<references />"; str = s1 + s2 + second_level_block; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("lead", lt); assertEquals(2, result.length); assertEquals(POS.unknown, result[0].getPOSType()); assertEquals(POS.verb, result[1].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); //assertTrue(result[1].text.toString().equalsIgnoreCase( s2.concat(second_level_block) )); // one POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "== lead I ==\n" + "English text1 \n"; second_level_block = "==Примечания==\n" + "<references />"; str = s1 + second_level_block; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("lead", lt); assertEquals(1, result.length); assertEquals(POS.unknown, result[0].getPOSType()); assertTrue(s1.equalsIgnoreCase( result[0].getText().toString() )); } @Test public void testSplitToPOSSections_second_level_title_with_accent() { System.out.println("splitToPOSSections_second_level_title_with_accent"); String str, s1, s2, s3; POSText[] result; StringBuffer s; LangText lt; lt = new LangText(LanguageType.en); // two POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "== ага́ I ==\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{part-ru|}}\n" + "\n"; s2 = "== ага́ II ==\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ ru f a 3b\n" + "}}\n"; s3 = "== а́га ==\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ ru f a 3b\n" + "}}\n"; str = s1 + s2 + s3; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("ага", lt); assertEquals(3, result.length); assertEquals(POS.particle, result[0].getPOSType()); assertEquals(POS.noun, result[1].getPOSType()); assertEquals(POS.noun, result[2].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); assertTrue(result[2].getText().toString().equalsIgnoreCase(s3)); } @Test public void testSplitToPOSSections_second_level_title_with_internal_brackets() { System.out.println("splitToPOSSections_second_level_title_with_internal_brackets"); String str, s1, s2; POSText[] result; StringBuffer s; LangText lt; lt = new LangText(LanguageType.en); // two POS in {{-en-}} in Russian Wiktionary s1 = "Before \n" + "== ага́ I ==\n" + "=== Морфологические и синтаксические свойства ===\n" + "{{part-ru|}}\n" + "\n"; s2 = "== ага́ II ==\n" + "===Морфологические и синтаксические свойства===\n" + "{{сущ ru f a 3b\n" + "|основа=аг\n" + "|слоги={{по-слогам|а|.|га́}}\n" + "|show-text=1\n" + "}}\n"; str = s1 + s2; lt.text = new StringBuffer(str); result = WPOSRu.splitToPOSSections("ага", lt); assertEquals(2, result.length); assertEquals(POS.particle, result[0].getPOSType()); assertEquals(POS.noun, result[1].getPOSType()); assertTrue(result[0].getText().toString().equalsIgnoreCase(s1)); assertTrue(result[1].getText().toString().equalsIgnoreCase(s2)); } }