/* Keeper.java - manager stores parsed data to MRD Wiktionary database (wikt_parsed). * * Copyright (c) 2008-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under GNU General Public License. */ package wikokit.base.wikt.mrd; import wikokit.base.wikt.word.WPOS; import wikokit.base.wikt.word.WordBase; import wikokit.base.wikt.word.WLanguage; import wikokit.base.wikt.word.WTranslation; import wikokit.base.wikt.word.WRelation; import wikokit.base.wikt.word.WMeaning; import wikokit.base.wikt.constant.Relation; import wikokit.base.wikt.sql.index.IndexForeign; import wikokit.base.wikt.sql.index.IndexNative; import wikokit.base.wikt.sql.TPOS; import wikokit.base.wikt.sql.TLangPOS; import wikokit.base.wikt.sql.TTranslation; import wikokit.base.wikt.sql.TWikiText; import wikokit.base.wikt.sql.TPage; import wikokit.base.wikt.sql.TRelation; import wikokit.base.wikt.sql.TMeaning; import wikokit.base.wikt.sql.TLang; import wikokit.base.wikipedia.sql.Connect; import wikokit.base.wikt.util.WikiText; import wikokit.base.wikipedia.language.LanguageType; import java.util.Map; import wikokit.base.wikt.sql.label.TLabel; import wikokit.base.wikt.sql.quote.TQuote; /** Manager stores parsed data to MRD Wiktionary database (wikt_parsed). */ public class Keeper { // private static boolean DEBUG = true; /** Stores word data to tables of parsed wiktionary database * * @param conn connection interface to a parsed wiktionary database * @param word data to be stored to a parsed wiktionary database * @param native_lang native language in the Wiktionary, * e.g. Russian language in Russian Wiktionary */ public static void storeToDB(Connect conn, WordBase word, LanguageType native_lang) { String page_title = word.getPageTitle(); // table 'page', stores page title, gets id of new page int word_count = 0; // to calculate, todo ... int wiki_link_count = 0; // to calculate, todo ... boolean is_in_wiktionary = true; TPage tpage = TPage.getOrInsert(conn, page_title, word_count, wiki_link_count, is_in_wiktionary, word.getRedirect()); if(null == tpage) { System.out.println("(Keeper.storeToDB()):: TPage.getOrInsert returned null. page_title='" + page_title + "'"); } if(word.isRedirect()) return; boolean b_page_added_to_index_native = false; WLanguage[] w_languages = word.getAllLanguages(); for(WLanguage w_lang : w_languages) { LanguageType lang_type = w_lang.getLanguage(); TLang tlang = TLang.get(lang_type); boolean b_native_lang = lang_type == native_lang; // word in native language WPOS[] w_pos_all = w_lang.getAllPOS(); int etymology_n = 0; for(WPOS w_pos : w_pos_all) { TPOS tpos = TPOS.get(w_pos.getPOS()); // tpage, tlang, tpos, etymology_n: -> into table 'lang_pos', gets id String lemma = ""; // todo ... TLangPOS lang_pos = TLangPOS.insert(conn, tpage, tlang, tpos, etymology_n, lemma); etymology_n ++; Map<Relation, WRelation[]> m_relations = w_pos.getAllRelations(); WTranslation[] translations = w_pos.getAllTranslation(); WMeaning[] w_meaning_all = w_pos.getAllMeanings(); for(int i=0; i<w_meaning_all.length; i++) { WMeaning w_meaning = w_meaning_all[i]; WikiText definition = w_meaning.getWikiText(); TWikiText twiki_text= TWikiText.storeToDB(conn, definition); TMeaning tmeaning = TMeaning.insert(conn, lang_pos, i, twiki_text); TQuote.storeToDB(conn, page_title, tmeaning, tlang, w_meaning.getQuotes()); TRelation.storeToDB(conn, tmeaning, i, m_relations); TLabel.storeToDB(conn, page_title, tmeaning, tlang, w_meaning.getLabels()); if(translations.length > i) // not every meaning is happy to have it's own translation TTranslation.storeToDB(conn, native_lang, page_title, lang_pos, tmeaning, translations[i]); twiki_text = null; // free memory tmeaning = null; } // some stubs don't have definition, but they have translations if(w_meaning_all.length == 0 && translations.length > 0) { for(int i=0; i<translations.length; i++) { TMeaning tmeaning = TMeaning.insert(conn, lang_pos, i, null); TTranslation.storeToDB(conn, native_lang, page_title, lang_pos, tmeaning, translations[i]); tmeaning = null; // free memory } } // index of words if(w_meaning_all.length > 0) { if(b_native_lang) { // index of words in native language if(!b_page_added_to_index_native) { b_page_added_to_index_native = true; IndexNative.insert(conn, tpage, !m_relations.isEmpty()); } } else IndexForeign.insertIfAbsent(conn, page_title, true, null, native_lang, lang_type); } tpos = null; // free memory lang_pos = null; translations = null; } tlang = null; w_lang = null; } tpage = null; // free memory w_languages = null; // 4. table 'relation', stores relation_id, meaning_id, wiki_text_id, // may be: page_id (for simple one-word relation, for relations which are presented in the db) // ? post-processing? // 5. table 'translation', stores: translation_id, meaning_summary // 6. table 'translation_entry', stores: translation_id, lang_id, wiki_text_id, // may be: page_id (for simple one-word translation, for translations which are presented in the db) // ? post-processing? } }