/* TWikiTextWords.java - SQL operations with the table 'wiki_text' in Wiktionary * parsed database. * * Copyright (c) 2009-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikokit.base.wikt.sql; import wikokit.base.wikt.util.WikiWord; import wikokit.base.wikipedia.sql.Connect; import java.sql.*; import java.util.List; import java.util.ArrayList; /** An operations with the table 'wiki_text_words' in MySQL wiktionary_parsed database. */ public class TWikiTextWords { /** Unique identifier in the table 'wiki_text_words'. */ private int id; /** Wiki text (without wikification). */ private TWikiText wiki_text; /** Link from wikified word to a title of wiki article (page), lemma. */ private TPage page; /** Link from wikified word to a title of wiki article (page) and an inflectional form. * It could be null, e.g. for "[[device]]". * It is not null for "[[run|running]]. */ private TPageInflection page_inflection; private final static TWikiText[] NULL_TWIKITEXT_ARRAY = new TWikiText[0]; private final static TWikiTextWords[] NULL_TWIKITEXTWORDS_ARRAY = new TWikiTextWords[0]; public TWikiTextWords(int _id,TWikiText _wiki_text,TPage _page,TPageInflection _page_inflection) { id = _id; wiki_text = _wiki_text; page = _page; page_inflection = _page_inflection; } /** Gets unique ID from database */ public int getID() { return id; } /** Gets wiki text from database */ public TWikiText getWikiText() { return wiki_text; } /** Gets page (article entry) from the table 'page'. */ public TPage getPage() { // wiki_text_word.page_id -> page.id if(null != page) return page; // wiki_text_word.page_inflection_id -> page_inflection.id -> page.id if(null != page_inflection) return page_inflection.getPage(); return null; } /** Clears (deletes) object fields. */ public void freeUp() { if(null != wiki_text) { // wiki_text.freeUp(); wiki_text = null; } if(null != page) { page = null; // page.freeUp(); } page_inflection = null; } /** If this word is absent in the table 'wiki_text_words' then * inserts records into tables: * 'wiki_text_words', * 'page_inflecton', * 'inflection', * 'page'. * * Tables 'page_inflecton' and 'inflection' will be filled * only if word_visible != word_link of wiki_word. * * @param wiki_text text with wiki words. * @return inserted record, or null if insertion failed */ public static void storeToDB (Connect connect,TWikiText twiki_text,WikiWord wiki_word) { if(null == wiki_word) return; String word_visible = wiki_word.getWordVisible(); String word_link = wiki_word.getWordLink(); if(word_link.length() == 0) return; // fill table 'page' int word_count = 0; int wiki_link_count = 0; boolean is_in_wiktionary = false; // don't know, may be (!todo the check) String redirect_target = null; TPage page = TPage.getOrInsert(connect, word_link, word_count, wiki_link_count, is_in_wiktionary, redirect_target); assert(page != null); TPageInflection page_infl; if(0 != word_link.compareTo(word_visible)) { int freq = 1; // fill also tables 'page_inflecton' and 'inflection' TInflection infl = TInflection.getOrInsert(connect, word_visible, freq); assert(null != infl); int term_freq = 1; page_infl = TPageInflection.getOrInsert(connect, page, infl, term_freq); assert(null != page_infl); infl = null; } else page_infl = null; // skip tables 'page_inflecton' and 'inflection' // fill table 'wiki_text_words' TWikiTextWords w = TWikiTextWords.getOrInsert(connect, twiki_text, page, page_infl); assert(w != null); w = null; page = null; page_infl = null; } /** Gets ID of a record or inserts record (if it is absent) * into the table 'wiki_text_words'. * * @return inserted record, or null if insertion failed */ public static TWikiTextWords getOrInsert (Connect connect,TWikiText wiki_text, TPage page,TPageInflection page_inflection) { TWikiTextWords w = TWikiTextWords.getByWikiTextAndPageAndInflection(connect, wiki_text, page, page_inflection); if(null == w) w = TWikiTextWords.insert(connect, wiki_text, page, page_inflection); return w; } /** Inserts record into the table 'wiki_text_words'.<br><br> * * INSERT INTO wiki_text_words (wiki_text_id,page_id,page_inflection_id) VALUES (1,1,1); * * @param wiki_text text (without wikification) * @param page link of wikified word * @param page_inflection wikified word from wiki_text, it could be null * @return inserted record, or null if insertion failed */ public static TWikiTextWords insert (Connect connect,TWikiText wiki_text, TPage page,TPageInflection page_inflection) { if(null == wiki_text || null == page) return null; StringBuilder str_sql = new StringBuilder(); TWikiTextWords words = null; try { Statement s = connect.conn.createStatement (); try { if(null != page_inflection) str_sql.append("INSERT INTO wiki_text_words (wiki_text_id,page_id,page_inflection_id) VALUES ("); else str_sql.append("INSERT INTO wiki_text_words (wiki_text_id,page_id) VALUES ("); str_sql.append(wiki_text.getID()); str_sql.append(","); str_sql.append(page.getID()); if(null != page_inflection) { str_sql.append(","); str_sql.append(page_inflection.getID()); } str_sql.append(")"); s.executeUpdate (str_sql.toString()); } finally { s.close(); } s = connect.conn.createStatement (); try { ResultSet rs = s.executeQuery ("SELECT LAST_INSERT_ID() as id"); try { if (rs.next ()) { words = new TWikiTextWords(rs.getInt("id"), wiki_text, page, page_inflection); //System.out.println("TWikiTextWords.insert()):: wiki_text='" + wiki_text.getText() + "'; id=" + rs.getInt("id") + "; page='" + page.getPageTitle() + "'"); } } finally { rs.close(); } } finally { s.close(); } }catch(SQLException ex) { System.out.println("SQLException (TWikiTextWords.insert()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return words; } /** Selects records from 'wiki_text_words' table by an ID of wiki text.<br><br> * * SELECT id,page_id,page_inflection_id FROM wiki_text_words WHERE wiki_text_id=1; * * @param text text (without wikification). * @return empty array if there are no wikified words */ public static TWikiTextWords[] getByWikiText (Connect connect,TWikiText wiki_text) { if(null == wiki_text) return NULL_TWIKITEXTWORDS_ARRAY; StringBuilder str_sql = new StringBuilder(); List<TWikiTextWords> list_words = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id,page_id,page_inflection_id FROM wiki_text_words WHERE wiki_text_id="); str_sql.append(wiki_text.getID()); ResultSet rs = s.executeQuery (str_sql.toString()); try { while (rs.next ()) { int pi = rs.getInt("page_inflection_id"); TPageInflection page_infl = 0 == pi ? null : TPageInflection.getByID(connect, pi); TPage page = TPage.getByID(connect, rs.getInt("page_id")); if(null != page) { if(null == list_words) list_words = new ArrayList<TWikiTextWords>(); list_words.add(new TWikiTextWords(rs.getInt("id"), wiki_text, page, page_infl)); } } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TWikiTextWords.getByWikiText()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } if(null == list_words) return NULL_TWIKITEXTWORDS_ARRAY; return ((TWikiTextWords[])list_words.toArray(NULL_TWIKITEXTWORDS_ARRAY)); } /** Selects records from 'wiki_text_words' table by an ID of page.<br><br> * SELECT id,wiki_text_id,page_inflection_id FROM wiki_text_words WHERE page_id=1; * @param page wikified word which belong to some wiki text * @return empty array if there are no wiki texts with this word */ public static TWikiTextWords[] getByPage (Connect connect,TPage page) { if(null == page) { System.out.println("Error (TWikiTextWords.getByPage()):: null argument page."); return NULL_TWIKITEXTWORDS_ARRAY; } StringBuilder str_sql = new StringBuilder(); List<TWikiTextWords> list_words = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id,wiki_text_id,page_inflection_id FROM wiki_text_words WHERE page_id="); str_sql.append(page.getID()); ResultSet rs = s.executeQuery (str_sql.toString()); try { while (rs.next ()) { int pi = rs.getInt("page_inflection_id"); TWikiText wiki_text = TWikiText.getByID(connect, rs.getInt("wiki_text_id")); TPageInflection page_infl = 0 == pi ? null : TPageInflection.getByID(connect, pi); if(null != wiki_text) { if(null == list_words) list_words = new ArrayList<TWikiTextWords>(); list_words.add(new TWikiTextWords(rs.getInt("id"), wiki_text, page, page_infl)); } } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TWikiTextWords.getByPage()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } if(null == list_words) return NULL_TWIKITEXTWORDS_ARRAY; return ((TWikiTextWords[])list_words.toArray(NULL_TWIKITEXTWORDS_ARRAY)); } /** Selects one record from 'wiki_text_words' table by wiki text ID * and page ID and page_inflection.id .<br><br> * * SELECT id FROM wiki_text_words WHERE wiki_text_id=9 AND page_id=9 AND page_inflection_id=9; * SELECT id FROM wiki_text_words WHERE wiki_text_id=9 AND page_id=9 AND page_inflection_id IS NULL; * @param text text (without wikification). * @return null if text is absent */ public static TWikiTextWords getByWikiTextAndPageAndInflection (Connect connect, TWikiText wiki_text,TPage page,TPageInflection page_inflection) { if(null == wiki_text || null == page) return null; StringBuilder str_sql = new StringBuilder(); TWikiTextWords word = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id FROM wiki_text_words WHERE wiki_text_id="); str_sql.append(wiki_text.getID()); str_sql.append(" AND page_id="); str_sql.append(page.getID()); if(null != page_inflection) { str_sql.append(" AND page_inflection_id="); str_sql.append(page_inflection.getID()); } else str_sql.append(" AND page_inflection_id IS NULL"); ResultSet rs = s.executeQuery (str_sql.toString()); try { if (rs.next ()) word = new TWikiTextWords(rs.getInt("id"), wiki_text, page, page_inflection); } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TWikiTextWords.getByWikiText()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return word; } /** Selects row from the table 'wiki_text' by ID<br><br> * * SELECT wiki_text_id,page_id,page_inflection_id FROM wiki_text_words WHERE id=1; * * @return null if data is absent */ public static TWikiTextWords getByID (Connect connect,int id) { StringBuilder str_sql = new StringBuilder(); TWikiTextWords word = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT wiki_text_id,page_id,page_inflection_id FROM wiki_text_words WHERE id="); str_sql.append(id); ResultSet rs = s.executeQuery (str_sql.toString()); try { if(rs.next ()) { TWikiText wiki_text = TWikiText.getByID(connect, rs.getInt("wiki_text_id")); TPage page = TPage. getByID(connect, rs.getInt("page_id")); int pi = rs.getInt("page_inflection_id"); TPageInflection page_infl = 0 == pi ? null : TPageInflection.getByID(connect, pi); if(null != wiki_text && null != page) word = new TWikiTextWords(id, wiki_text, page, page_infl); } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TWikiTextWords.getByID()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return word; } /** Selects only first row (LIMIT 1) from the table 'wiki_text' by wiki_text_id<br><br> * * SELECT id,page_id,page_inflection_id FROM wiki_text_words WHERE wiki_text_id=1 LIMIT 1; * * @return null if data is absent */ public static TWikiTextWords getOneByWikiText (Connect connect,TWikiText wiki_text) { if(null == wiki_text) { System.out.println("Error (TWikiTextWords.getOneByWikiText()):: null argument wiki_text."); return null; } StringBuilder str_sql = new StringBuilder(); TWikiTextWords word = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id,page_id,page_inflection_id FROM wiki_text_words WHERE wiki_text_id="); str_sql.append(wiki_text.getID()); str_sql.append(" LIMIT 1"); ResultSet rs = s.executeQuery (str_sql.toString()); try { if(rs.next ()) { int id = rs.getInt("id"); TPage page = TPage.getByID(connect, rs.getInt("page_id")); int pi = rs.getInt("page_inflection_id"); TPageInflection page_infl = 0 == pi ? null : TPageInflection.getByID(connect, pi); if(null != wiki_text && null != page) word = new TWikiTextWords(id, wiki_text, page, page_infl); } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TWikiTextWords.getByID()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return word; } /** Selects records from 'page' table (via 'wiki_text_words' and * 'page_inflection' tables) by wiki text with one condition: * only one word should be wikified in this wiki text.<br><br> * * This could be used in order to find translations for this article.<br><br> * * E.g. to find articles which contain the translation [[little]]. * If there are three translations on this page:<br> * (1) == translation == [[little]] [[bell]]<br> * (2) == translation == [[little bell]]<br> * (3) == translation == [[little]]<br> * then translation (1) is not suitable (two wiki words), but (2) and (3) are OK. * * @param wikified text. * @return null if text is absent */ public static TPage getPageForOneWordWikiText (Connect connect,TWikiText wiki_text) { if(null == wiki_text) { System.out.println("Error (TWikiTextWords.getPageForOneWordWikiText()):: null argument wiki_text."); return null; } TWikiTextWords[] words = TWikiTextWords.getByWikiText(connect, wiki_text); if(1 == words.length) return words[0].getPage(); return null; } /** Gets wiki texts which have only one wikified word. * * @param page a wikified word in the sought wiki phrases * @return empty array if such texts are absent */ public static TWikiText[] getOneWordWikiTextByPage (Connect connect,TPage page) { if(null == page) { System.out.println("Error (TWikiTextWords.getOneWordWikiTextByPage()):: null argument page."); return NULL_TWIKITEXT_ARRAY; } // 1. 'page_inflection' // todo ... // 2. 'wiki_text_words' List<TWikiText> list_texts = null; TWikiTextWords[] words = TWikiTextWords.getByPage(connect, page); for(TWikiTextWords w : words) { TWikiText wiki_text = w.getWikiText(); if(null != wiki_text) { TWikiTextWords[] ww = TWikiTextWords.getByWikiText(connect, wiki_text); if(1 == ww.length) { if(null == list_texts) list_texts = new ArrayList<TWikiText>(); list_texts.add(wiki_text); } } } if(null == list_texts) return NULL_TWIKITEXT_ARRAY; return ((TWikiText[])list_texts.toArray(NULL_TWIKITEXT_ARRAY)); } /** Deletes row from the table 'wiki_text_words' by a value of ID. * DELETE FROM wiki_text_words WHERE id=1; * @param id unique ID in the table `wiki_text_words` */ public static void delete (Connect connect,TWikiTextWords word) { if(null == word) { System.out.println("Error (wikt_parsed TWikiTextWords.delete()):: null argument word."); return; } StringBuilder str_sql = new StringBuilder(); try { Statement s = connect.conn.createStatement (); try { str_sql.append("DELETE FROM wiki_text_words WHERE id="); str_sql.append(word.getID()); s.execute (str_sql.toString()); } finally { s.close(); } //System.out.println("TWikiTextWords.delete()):: wiki_text='" + word.getWikiText().getText() + "'; id=" + word.getID()); } catch(SQLException ex) { System.out.println("SQLException (wikt_parsed TWikiTextWords.java delete()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } } }