package wikokit.base.wikt.sql; import wikokit.base.wikt.sql.TPOS; import wikokit.base.wikt.sql.TLangPOS; import wikokit.base.wikt.sql.TTranslation; import wikokit.base.wikt.sql.TPage; import wikokit.base.wikt.sql.TLang; import wikokit.base.wikt.sql.TMeaning; import wikokit.base.wikt.sql.TWikiText; import wikokit.base.wikipedia.language.LanguageType; import wikokit.base.wikt.constant.POS; import wikokit.base.wikt.util.POSText; import wikokit.base.wikt.multi.ru.WTranslationRu; import wikokit.base.wikt.word.WTranslation; //import wikt.word.WTranslationEntry; //import wikipedia.sql.UtilSQL; import wikokit.base.wikt.sql.index.IndexForeign; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; import wikokit.base.wikipedia.sql.Connect; public class TTranslationTest { public Connect ruwikt_parsed_conn; String samolyot_text, kolokolchik_text, kolokolchik_text_1_translation_box, kosa_text_1_translation_box_without_header; LanguageType native_lang; TPage page; String page_title; TPOS pos; TLangPOS lang_pos; TMeaning meaning; String meaning_summary; WTranslation[] wtrans_all; public TTranslationTest() { } @BeforeClass public static void setUpClass() throws Exception { } @AfterClass public static void tearDownClass() throws Exception { } @Before public void setUp() { ruwikt_parsed_conn = new Connect(); ruwikt_parsed_conn.Open(Connect.RUWIKT_HOST,Connect.RUWIKT_PARSED_DB,Connect.RUWIKT_USER,Connect.RUWIKT_PASS, LanguageType.ru); native_lang = LanguageType.ru; TLang.recreateTable(ruwikt_parsed_conn); // once upon a time: create Wiktionary parsed db TLang.createFastMaps(ruwikt_parsed_conn); // once upon a time: use Wiktionary parsed db TPOS.recreateTable(ruwikt_parsed_conn); // once upon a time: create Wiktionary parsed db TPOS.createFastMaps(ruwikt_parsed_conn); // once upon a time: use Wiktionary parsed db kolokolchik_text = "text before \n" + "===Перевод===\n" + "{{перев-блок|звонок_test|\n" + "|en=[[little]] [[bell_test]], [[handbell_test]], [[doorbell_test]]\n" + "|de=[[Glöckchen_test]], [[Schelle_test]], [[Klingel_test]]\n" + "|os=[[мыр-мыраг_test]], [[хъуытаз_test]] {{m}}\n" + "|fr=[[sonnette_test]], [[clochette_test]], [[clarine_test]]; (у скота) [[sonnaille_test]]\n" + "}}\n" + "{{перев-блок|оркестровый инструмент_test|\n" + "|en=[[glockenspiel_test]]\n" + "}}\n" + "\n" + "{{перев-блок|цветок_test\n" + "|en=[[bluebell_test]], [[bellflower_test]], [[campanula_test]]\n" + "|os=[[дзæнгæрæг_test]], [[къæрцгæнæг_test]]\n" + "|fr=[[campanule_test]], [[clochette_test]]\n" + "}}\n" + "\n" + "===Библиография===\n" + "*\n" + "\n{{categ|category1|category2|lang=}}" + "\n" + "[[Категория:Музыкальные инструменты]]\n"; kolokolchik_text_1_translation_box = "{{перев-блок|цветок\n" + "|en=[[bluebell_test]], [[bellflower_test]], [[campanula_test]]\n" + "|os=[[дзæнгæрæг_test]], [[къæрцгæнæг_test]]\n" + "|fr=[[campanule_test]], [[clochette_test]]\n" + "}}\n"; /*kosa_text_1_translation_box_without_header = "{{перев-блок\n" + "|en=[[braid_test]], [[plait_test]], [[pigtail_test]], [[queue_test]]\n" + "|de=[[Zopf_test]] {{m}} -es, Zöpfe\n" + "|fr=[[natte_test]] {{f}}; [[couette_test]] {{f}}, [[tresse_test]] <i>f</i>\n" + "}}\n";*/ Connect conn = ruwikt_parsed_conn; LanguageType wikt_lang = LanguageType.ru; // Russian Wiktionary page_title = "колокольчик_test"; // page_title = conn.enc.EncodeFromJava("test_TTranslation"); LanguageType lang_section = LanguageType.ru; // Russian word POSText pt = new POSText(POS.noun, kolokolchik_text); wtrans_all = WTranslationRu.parse(wikt_lang, lang_section, page_title, pt); // insert page, get meaning_id int word_count = 7; int wiki_link_count = 13; boolean is_in_wiktionary = true; String redirect_target = null; page = null; page = TPage.get(conn, page_title); if(null == page) { TPage.insert(conn, page_title, word_count, wiki_link_count, is_in_wiktionary, redirect_target); page = TPage.get(conn, page_title); } int lang_id = TLang.getIDFast(lang_section); TLang lang = TLang.getTLangFast(lang_id); int etymology_n = 0; String lemma = ""; pos = TPOS.get(POS.noun); TLangPOS.insert(conn, page, lang, pos, etymology_n, lemma); // let's found ID: TLangPOS[] array_lang_pos = TLangPOS.get(conn, page); assertTrue(null != array_lang_pos); assertEquals(1, array_lang_pos.length); int id = array_lang_pos[0].getID(); // test lang_pos = TLangPOS.getByID(conn, id); assertTrue(null != lang_pos); assertEquals(page.getID(), lang_pos.getPage().getID()); int meaning_n = 1; TWikiText wiki_text = null; meaning = TMeaning.insert(conn, lang_pos, meaning_n, wiki_text); assertNotNull(meaning); meaning_summary = "meaning_summary__test_TTranslation"; } @After public void tearDown() { Connect conn = ruwikt_parsed_conn; String[] pages_test = { "handbell_test", "doorbell_test", "glockenspiel_test", "bluebell_test", "bellflower_test", "campanula_test", // en "Glöckchen_test", "Schelle_test", "Klingel_test", // de "мыр-мыраг_test", "дзæнгæрæг_test", "къæрцгæнæг_test", // os "sonnette_test", "clarine_test", "campanule_test", "clochette_test", // fr // phrases (more than one wiki word) "little bell_test", "bell_test", // // [[little]] [[bell_test]], "(у скота) sonnaille_test", "sonnaille_test", // (у скота) [[sonnaille_test]] "хъуытаз_test {{m}}", "хъуытаз_test", // [[хъуытаз_test]] {{m}} // translation header "звонок_test", // {{перев-блок|звонок_test "оркестровый инструмент_test", // {{перев-блок|оркестровый инструмент_test "цветок_test", // {{перев-блок|цветок_test }; // delete temporary DB record // index_de IndexForeign.delete(conn, "Glöckchen_test", "колокольчик_test", native_lang, LanguageType.de); IndexForeign.delete(conn, "Schelle_test", "колокольчик_test", native_lang, LanguageType.de); IndexForeign.delete(conn, "Klingel_test", "колокольчик_test", native_lang, LanguageType.de); // index_en IndexForeign.delete(conn, "little bell_test", "колокольчик_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "handbell_test", "колокольчик_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "doorbell_test", "колокольчик_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "glockenspiel_test", "колокольчик_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "bluebell_test", "колокольчик_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "bellflower_test", "колокольчик_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "campanula_test", "колокольчик_test", native_lang, LanguageType.en); // index_fr IndexForeign.delete(conn, "sonnette_test", "колокольчик_test", native_lang, LanguageType.fr); IndexForeign.delete(conn, "clochette_test", "колокольчик_test", native_lang, LanguageType.fr); IndexForeign.delete(conn, "clarine_test", "колокольчик_test", native_lang, LanguageType.fr); IndexForeign.delete(conn, "(у скота) sonnaille_test", "колокольчик_test", native_lang, LanguageType.fr); IndexForeign.delete(conn, "campanule_test", "колокольчик_test", native_lang, LanguageType.fr); // index_os IndexForeign.delete(conn, "мыр-мыраг_test", "колокольчик_test", native_lang, LanguageType.os); IndexForeign.delete(conn, "хъуытаз_test {{m}}", "колокольчик_test", native_lang, LanguageType.os); IndexForeign.delete(conn, "дзæнгæрæг_test", "колокольчик_test", native_lang, LanguageType.os); IndexForeign.delete(conn, "къæрцгæнæг_test", "колокольчик_test", native_lang, LanguageType.os); page_title = "колокольчик_test"; page = TPage.get(conn, page_title); TLangPOS[] ar_lang_pos = TLangPOS.get(conn, page); for(TLangPOS lp : ar_lang_pos) { TTranslation[] tt = TTranslation.getByLangPOS(conn, lp); for(TTranslation t : tt) TTranslation.deleteWithEntries(conn, t); } for(String p: pages_test) { TWikiText wiki_text = TWikiText.get(conn, p); // 1. get WikiText by pages_test if(null != wiki_text) TWikiText.deleteWithWords(conn, wiki_text); TPage.delete(conn, p); } TLangPOS.delete(conn, page); TPage.delete(conn, page_title); TMeaning.delete(conn, meaning); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "page"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "relation"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "lang_pos"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "meaning"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "wiki_text"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "wiki_text_words"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "translation"); //UtilSQL.deleteAllRecordsResetAutoIncrement(conn, "translation_entry"); //conn.Close(); } @Test public void testStoreToDB () { System.out.println("storeToDB"); Connect conn = ruwikt_parsed_conn; for(WTranslation wtrans : wtrans_all) { TTranslation.storeToDB( conn, native_lang, page_title, lang_pos, meaning, wtrans); } // gets translation from Russian into English (in Russian Wiktionary): // gets wikified words from text in the section == Translation == // page -> lang_pos -> meaning // page -> lang_pos -> translation // language -> translation // // "{{перев-блок|звонок|\n" + // "|en=[[little]] [[bell]], [[handbell]], [[doorbell]]\n" + // "{{перев-блок|оркестровый инструмент|\n" + // "|en=[[glockenspiel]]\n" + // "{{перев-блок|цветок\n" + // "|en=[[bluebell]], [[bellflower]], [[campanula]]\n" + TLang source_lang = TLang.get(LanguageType.ru); TLang target_lang = TLang.get(LanguageType.en); TPage[] en_translations = TTranslation.fromPageToTranslations(conn, source_lang, page, target_lang); // page = "колокольчик" assertNotNull(en_translations); assertEquals(6, en_translations.length); // 6: handbell, doorbell, glockenspiel, bluebell, bellflower, campanula // except 1 wiki phrase which consists of two wiki words: "[[little]] [[bell]]" // gets translation from English into Russian (in Russian Wiktionary): // page -> wiki_text_words -> wiki_text -> ? meaning -> lang_pos -> page // -> ? translation -> lang_pos -> page // звонок // fr=[[sonnette]] TPage fr_page = TPage.get(conn, "sonnette_test"); assertNotNull(fr_page); // there is no English translation for French word "sonnette" target_lang = TLang.get(LanguageType.en); TPage[] ru_source = TTranslation.fromTranslationsToPage(conn, source_lang, fr_page, target_lang); assertNotNull(ru_source); assertEquals(0, ru_source.length); // there is 1 French translation for French word "sonnette" target_lang = TLang.get(LanguageType.fr); ru_source = TTranslation.fromTranslationsToPage(conn, source_lang, fr_page, target_lang); assertNotNull(ru_source); assertEquals(1, ru_source.length); assertEquals(page_title, ru_source[0].getPageTitle()); } @Test public void testFromTranslationsToPage_strings () { System.out.println("fromTranslationsToPage_strings"); Connect conn = ruwikt_parsed_conn; for(WTranslation wtrans : wtrans_all) { TTranslation.storeToDB( conn, native_lang, page_title, lang_pos, meaning, wtrans); } // "{{перев-блок|звонок|\n" + // "|en=[[little]] [[bell]], [[handbell]], [[doorbell_test]]\n" + // "{{перев-блок|оркестровый инструмент|\n" + // "|en=[[glockenspiel]]\n" + // "{{перев-блок|цветок\n" + // "|en=[[bluebell]], [[bellflower]], [[campanula]]\n" + LanguageType source_lang = LanguageType.ru; LanguageType target_lang = LanguageType.en; // there is 1 translation Russian ("колокольчик") -> English ("doorbell_test") String en_translation = "doorbell_test"; String[] ru_source = TTranslation.fromTranslationsToPage(conn, source_lang, en_translation, target_lang); assertNotNull(ru_source); assertEquals(1, ru_source.length); assertEquals(page_title, ru_source[0]); } @Test public void testFromTranslationsToPage_UpperCaseConflict () { System.out.println("fromTranslationsToPage_strings"); Connect conn = ruwikt_parsed_conn; String redirect_target = null; // 1. let's check conflict: "plane" and "Plane" { String de_page_title = "Plane_test"; TPage de_page = TPage.get(conn, de_page_title); assertNull(de_page); de_page = TPage.insert(conn, de_page_title, 0, 0, false, redirect_target); assertNotNull(de_page); TPage.delete(conn, de_page_title); } // 2. let's check more than one translations: "самолёт" -> "plane" and "план" -> "plane" TLang lang = TLang.get(LanguageType.ru); int etymology_n = 0; String lemma = ""; LanguageType wikt_lang = LanguageType.ru; // Russian Wiktionary LanguageType lang_section = LanguageType.ru; // Russian word // block I { page_title = "самолёт_test"; page = TPage.insert(conn, page_title, 0, 0, false, redirect_target); assertNotNull(page); lang_pos = TLangPOS.insert(conn, page, lang, pos, etymology_n, lemma); assertNotNull(lang_pos); String samolyot_text = "text before \n" + "===Перевод===\n" + "{{перев-блок||\n" + "|en=[[airplane_test]], [[plane_test]], [[aircraft_test]]\n" + "|bg=[[самолет_test]], [[аероплан_test]]\n" + "}}\n"; POSText pt = new POSText(POS.noun, samolyot_text); wtrans_all = WTranslationRu.parse(wikt_lang, lang_section, page_title, pt); for(WTranslation wtrans : wtrans_all) { TTranslation.storeToDB( conn, native_lang, page_title, lang_pos, meaning, wtrans); } } // block II { page_title = "план_test"; page = TPage.insert(conn, page_title, 0, 0, false, redirect_target); assertNotNull(page); lang_pos = TLangPOS.insert(conn, page, lang, pos, etymology_n, lemma); assertNotNull(lang_pos); String plan_text = "text before \n" + "===Перевод===\n" + "{{перев-блок|схема, чертёж_test|\n" + "|en=[[map_test]], [[plane2_test]], [[scheme_test]]\n" + "}}\n" + "\n" + "{{перев-блок|программа_test|\n" + "|en=[[plan_test]], [[draft_test]], [[scheme_test]], [[contrivance_test]], [[road map_test]]\n" + "}}\n"; POSText pt = new POSText(POS.noun, plan_text); wtrans_all = WTranslationRu.parse(wikt_lang, lang_section, page_title, pt); for(WTranslation wtrans : wtrans_all) { TTranslation.storeToDB( conn, native_lang, page_title, lang_pos, meaning, wtrans); } } /* // "|en=[[airplane]], [[plane]], [[aircraft]]\n" + // самолёт // "|en=[[map]], [[plane]], [[scheme]]\n" + // план LanguageType source_lang = LanguageType.ru; LanguageType target_lang = LanguageType.en; // there are 2 translations: Russian ("самолёт", "план") -> English ("plane") String en_translation = "plane_test"; String[] ru_source = TTranslation.fromTranslationsToPage(conn, source_lang, en_translation, target_lang); assertNotNull(ru_source); assertEquals(2, ru_source.length); assertTrue( (ru_source[0].equals("самолёт_test") || ru_source[1].equals("самолёт_test")) && (ru_source[0].equals("план_test") || ru_source[1].equals("план_test")) ); */ // INSERT INTO "translation_entry" VALUES (869,42,262,1382); // delete temporary DB record of block I { page_title = "самолёт_test"; page = TPage.get(conn, page_title); IndexForeign.delete(conn, "airplane_test", "самолёт_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "plane_test", "самолёт_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "aircraft_test", "самолёт_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "самолет_test", "самолёт_test", native_lang, LanguageType.bg); IndexForeign.delete(conn, "аероплан_test", "самолёт_test", native_lang, LanguageType.bg); TLangPOS[] ar_lang_pos = TLangPOS.get(conn, page); for(TLangPOS lp : ar_lang_pos) { TTranslation[] tt = TTranslation.getByLangPOS(conn, lp); for(TTranslation t : tt) TTranslation.deleteWithEntries(conn, t); } String[] pages_test = { "самолёт_test", // ru "аероплан_test", "самолет_test", // bg "airplane_test", "plane_test", "aircraft_test", // en }; for(String p: pages_test) { TWikiText wiki_text = TWikiText.get(conn, p); // 1. get WikiText by pages_test if(null != wiki_text) TWikiText.deleteWithWords(conn, wiki_text); TPage.delete(conn, p); } TLangPOS.delete(conn, page); TPage.delete(conn, page_title); } // delete temporary DB record of block II { page_title = "план_test"; page = TPage.get(conn, page_title); IndexForeign.delete(conn, "map_test", "план_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "plane2_test", "план_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "scheme_test", "план_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "plan_test", "план_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "draft_test", "план_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "contrivance_test", "план_test", native_lang, LanguageType.en); IndexForeign.delete(conn, "road map_test", "план_test", native_lang, LanguageType.en); TLangPOS[] ar_lang_pos = TLangPOS.get(conn, page); for(TLangPOS lp : ar_lang_pos) { TTranslation[] tt = TTranslation.getByLangPOS(conn, lp); for(TTranslation t : tt) TTranslation.deleteWithEntries(conn, t); } String[] pages_test = { "scheme_test", "plan_test", "plane2_test", "draft_test", "contrivance_test", "map_test", "road map_test", "схема, чертёж_test", "программа_test" }; // translation headers for(String p: pages_test) { TWikiText wiki_text = TWikiText.get(conn, p); // 1. get WikiText by pages_test if(null != wiki_text) TWikiText.deleteWithWords(conn, wiki_text); TPage.delete(conn, p); } TLangPOS.delete(conn, page); TPage.delete(conn, page_title); } } @Test public void testInsert() { System.out.println("insert"); Connect conn = ruwikt_parsed_conn; TTranslation trans = TTranslation.insert(conn, lang_pos, meaning_summary, meaning); assertNotNull(trans); TLangPOS _lang_pos = trans.getLangPOS(); assertNotNull(_lang_pos); TPage _tpage = _lang_pos.getPage(); assertNotNull(_tpage); assertEquals(page_title, _tpage.getPageTitle()); assertEquals(meaning_summary, trans.getMeaningSummary()); TMeaning _meaning = trans.getMeaning(); assertNotNull(_meaning); TTranslation.delete(conn, trans); } @Test public void testInsertWithMeaningNULL() { System.out.println("insert__meaning_null"); Connect conn = ruwikt_parsed_conn; TMeaning meaning_local = null; TTranslation trans = TTranslation.insert(conn, lang_pos, meaning_summary, meaning_local); assertNotNull(trans); TLangPOS _lang_pos = trans.getLangPOS(); assertNotNull(_lang_pos); TPage _tpage = _lang_pos.getPage(); assertNotNull(_tpage); assertEquals(page_title, _tpage.getPageTitle()); assertEquals(meaning_summary, trans.getMeaningSummary()); assertNull(trans.getMeaning()); TTranslation.delete(conn, trans); } @Test public void testGetByID() { System.out.println("getByID"); Connect conn = ruwikt_parsed_conn; TTranslation trans = TTranslation.insert(conn, lang_pos, meaning_summary, meaning); assertNotNull(trans); TTranslation trans2 = TTranslation.getByID(conn, trans.getID()); assertEquals(trans.getMeaningSummary(), trans2.getMeaningSummary()); TTranslation.delete(conn, trans); } @Test public void testGetByID_WithMeaningNULL() { System.out.println("getByID_WithMeaningNULL"); Connect conn = ruwikt_parsed_conn; TMeaning meaning_local = null; TTranslation trans = TTranslation.insert(conn, lang_pos, meaning_summary, meaning_local); assertNotNull(trans); TTranslation trans2 = TTranslation.getByID(conn, trans.getID()); assertNull(trans2.getMeaning()); TTranslation.delete(conn, trans); } @Test public void testGetByLangPOS () { System.out.println("getByLangPOS"); Connect conn = ruwikt_parsed_conn; TTranslation trans = TTranslation.insert(conn, lang_pos, meaning_summary, meaning); assertNotNull(trans); TTranslation[] trans_all = TTranslation.getByLangPOS (conn, lang_pos); assertNotNull(trans_all); assertEquals(1, trans_all.length); TTranslation.delete(conn, trans); } @Test public void testGetByMeaning () { System.out.println("getByMeaning"); Connect conn = ruwikt_parsed_conn; TTranslation trans = TTranslation.insert(conn, lang_pos, meaning_summary, meaning); assertNotNull(trans); TTranslation ttrans = TTranslation.getByMeaning(conn, meaning); assertNotNull(ttrans); assertTrue(ttrans.getID() > 0); TTranslation.delete(conn, trans); } }