/* TLangPOS.java - SQL operations with the table 'lang_pos' in Wiktionary * parsed database. * * Copyright (c) 2009-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikokit.base.wikt.sql; import wikokit.base.wikt.constant.SoftRedirectType; import wikokit.base.wikipedia.language.Encodings; import wikokit.base.wikipedia.sql.PageTableBase; import wikokit.base.wikipedia.sql.Connect; import java.sql.*; import wikokit.base.wikt.constant.Relation; import java.util.Set; import java.util.HashSet; import java.util.Map; import java.util.List; import java.util.ArrayList; /** Operations with the table 'lang_pos' in MySQL Wiktionary parsed database. * * @see wikt.word.WPOS */ public class TLangPOS { /** Unique identifier in the table 'lang_pos'. */ private int id; /** Title of the wiki page, word. */ private TPage page; // int page_id; /** Language. */ private TLang lang; // int lang_id /** Part of speech. */ private TPOS pos; // int pos_id /** Etymology number (from 0 till max(ruwikt,now)=7). */ private int etymology_id; //private TEtymology etimology; // int etymology_id // see WPOSRu.splitToPOSSections in WPOSRuTest.java /** Type of soft redirect (to the page .lemma): * 0 - None - it's not a redirect, it is the usual Wiktionary entry * 1 - Wordform, soft redirect to lemma, e.g. worked -> work * e.g. worked: "Simple past tense and past participle of [[work]]." * 2 - Misspelling, soft redirect to correct spelling, * see template {{misspelling of|}} in enwikt * * @see TPage.is_redirect - a hard redirect. */ private SoftRedirectType redirect_type; /** A lemma of word. It's used when .redirect_type != None */ private String lemma; /** Meaning consists of Definitions + Quotations, Semantic relations and Translations. */ private TMeaning[] meaning; private final static TLangPOS[] NULL_TLANGPOS_ARRAY = new TLangPOS[0]; private final static TLang [] NULL_TLANG_ARRAY = new TLang[0]; private final static TMeaning[] NULL_TMEANING_ARRAY = new TMeaning[0]; public TLangPOS(int _id,TPage _page,TLang _lang,TPOS _pos,int _etymology_id,String _lemma) { id = _id; page = _page; lang = _lang; pos = _pos; etymology_id = _etymology_id; lemma = _lemma; meaning = NULL_TMEANING_ARRAY; } /** Gets unique ID from database */ public int getID() { return id; } /** Gets page from database */ public TPage getPage() { return page; } /** Gets language from database */ public TLang getLang() { return lang; } /** Gets part of speech from database. */ public TPOS getPOS() { return pos; } /** Gets meaning (already loaded from database, @see getRecursive). */ public TMeaning[] getMeaning() { return meaning; } /** Gets number of meanings (already loaded from database, @see getRecursive). */ public int countMeanings() { return meaning.length; } /** Gets number of types of semantic relations defined for the meanings, * e.g. only Synonymy means '1', Synonymy + Antonymy = 2, etc. * * Remark: relations should be already loaded, @see getRecursive. */ public int countRelationTypes() { Set<Relation> rel_types = new HashSet<Relation>(); for(TMeaning m : meaning) { Set<Relation> relation = m.getRelation().keySet(); rel_types.addAll(relation); } return rel_types.size(); } /** Increments the number of semantic relations per types for the meanings, * i.e. number of synonyms for all meanings of this words, * number of antonyms for all meanings of this word, etc. * * Remark: relations should be already loaded, @see getRecursive. */ public void addNumberOfRelationPerType(Map<Relation, Integer> m_result) { for(TMeaning m : meaning) { Map<Relation, TRelation[]> m_trel = m.getRelation(); for(Relation r : m_trel.keySet()) { int add = m_trel.get(r).length; if(m_result.containsKey(r)) { int old = m_result.get(r); m_result.put(r, old + add); } else { m_result.put(r, add); } } } } /** Inserts record into the table 'lang_pos'.<br><br> * INSERT INTO lang_pos (page_id,lang_id,pos_id,etymology_n,lemma) VALUES (1,2,3,4,"apple"); * @param TPage ID of title of wiki page which will be added * @param lang language of a word at a page * @param pos part of speech of a word * @param etymology_n enumeration for homographs * @param lemma e.g. "run" for the page_title="running" * @return null if data is absent */ public static TLangPOS insert (Connect connect,TPage page,TLang lang,TPOS pos, int etymology_n,String lemma) { if(null == page || null == lang || null == pos) { System.out.println("Error (wikt_parsed TLangPOS.insert()):: null arguments, page="+page+", lang="+lang+", pos="+pos); return null; } StringBuilder str_sql = new StringBuilder(); TLangPOS lang_pos = null; lang_pos = getUniqueByPagePOSLangEtymology (connect, page, lang, pos, etymology_n); if(null != lang_pos) { System.out.println("Error (TLangPOS.java insert()):: page_title="+page.getPageTitle()+ "; the language header is repeated twice or more!'"); return lang_pos; } try { Statement s = connect.conn.createStatement (); try { str_sql.append("INSERT INTO lang_pos (page_id,lang_id,pos_id,etymology_n,lemma) VALUES ("); str_sql.append(page.getID()); str_sql.append(","); str_sql.append(lang.getID()); str_sql.append(","); str_sql.append(pos.getID()); str_sql.append(","); str_sql.append(etymology_n); if(null != lemma && lemma.length() > 0) { str_sql.append(",\""); String safe_lemma = PageTableBase.convertToSafeStringEncodeToDB(connect, lemma); str_sql.append(safe_lemma); str_sql.append("\")"); } else str_sql.append(",\"\")"); s.executeUpdate (str_sql.toString()); } finally { s.close(); } try { s = connect.conn.createStatement (); ResultSet rs = s.executeQuery ("SELECT LAST_INSERT_ID() as id"); try { if (rs.next ()) lang_pos = new TLangPOS(rs.getInt("id"), page, lang, pos, etymology_n, lemma); } finally { rs.close(); } } finally { s.close(); } }catch(SQLException ex) { String page_title = page.getPageTitle(); System.out.println("SQLException (TLangPOS.insert()):: page_title="+page_title+ "; sql='" + str_sql.toString() + "' " + ex.getMessage()); } return lang_pos; } /** Selects rows from the table 'lang_pos' by the page_id.<br><br> * * SELECT id,lang_id,pos_id,etymology_n,lemma FROM lang_pos WHERE page_id=562 ORDER BY id; * * @return empty array if data is absent */ public static TLangPOS[] get (Connect connect,TPage page) { // Todo? // // (lang_id?, pos_id?) : add selection by: language and POS? : // * @param lang language of Wiktionary article, if lang==null then language are not used in order to filter data // * @param pos part of speech of Wiktionary article, if pos==null then POS are not used in order to filter data //public static TLangPOS[] get (Connect connect,TPage page,TLang lang,TPOS pos) { //String safe_title = PageTableBase.convertToSafeStringEncodeToDB(connect, page_title); if(null == page) { System.out.println("Error (wikt_parsed TLangPOS.get()):: null argument: page."); return null; } StringBuilder str_sql = new StringBuilder(); List<TLangPOS> list_lp = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id,lang_id,pos_id,etymology_n,lemma FROM lang_pos WHERE page_id="); str_sql.append(page.getID()); str_sql.append(" ORDER BY id"); ResultSet rs = s.executeQuery (str_sql.toString()); try { while (rs.next ()) { int id = rs.getInt("id"); TLang lang = TLang.getTLangFast( rs.getInt("lang_id")); TPOS pos = TPOS. getTPOSFast ( rs.getInt("pos_id")); int etymology_n = rs.getInt("etymology_n"); String lemma = Encodings.bytesToUTF8(rs.getBytes("lemma")); if(null != lang && null != pos) { if(null == list_lp) list_lp = new ArrayList<TLangPOS>(); list_lp.add(new TLangPOS(id, page, lang, pos, etymology_n, lemma)); } } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TLangPOS.get()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } if(null == list_lp) return NULL_TLANGPOS_ARRAY; return ((TLangPOS[])list_lp.toArray(NULL_TLANGPOS_ARRAY)); } /** Selects one (unique) rows from the table 'lang_pos' by the page ID, * POS ID, language ID, and etymology number. * * SELECT id,lemma FROM lang_pos WHERE page_id=1 AND pos_id=20 AND lang_id=390 AND etymology_n=2; * * @return null if data is absent */ public static TLangPOS getUniqueByPagePOSLangEtymology (Connect connect, TPage page,TLang lang,TPOS pos,int etymology_n) { if(null == page || null == lang || null == pos) { System.out.println("Error (TLangPOS.getUniqueByPagePOSLangEtymology()):: null arguments, page="+page+", lang="+lang+", pos="+pos); return null; } StringBuilder str_sql = new StringBuilder(); TLangPOS lang_pos = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id,lemma FROM lang_pos WHERE page_id="); str_sql.append(page.getID()); // 3902 str_sql.append(" AND pos_id="); str_sql.append(pos.getID()); str_sql.append(" AND lang_id="); str_sql.append(lang.getID()); str_sql.append(" AND etymology_n="); str_sql.append(etymology_n); ResultSet rs = s.executeQuery (str_sql.toString()); try { if (rs.next ()) { int id = rs.getInt("id"); String lemma = Encodings.bytesToUTF8(rs.getBytes("lemma")); lang_pos = new TLangPOS(id, page, lang, pos, etymology_n, lemma); } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TLangPOS.getUniqueByPagePOSLangEtymology()):: page_title="+page.getPageTitle()+ "; sql='" + str_sql.toString() + "' " + ex.getMessage()); } return lang_pos; } /** Selects rows from the table 'lang_pos' by the page_id, * fills (recursively) meanings, relations, translations. * * @return empty array if data is absent */ public static TLangPOS[] getRecursive (Connect connect,TPage page) { TLangPOS[] lang_pos_all = TLangPOS.get(connect, page); for(TLangPOS lang_pos : lang_pos_all) { lang_pos.meaning = TMeaning.getRecursive(connect, lang_pos); } return lang_pos_all; } /** Selects list of languages for the given page. * * SELECt lang_id FROM lang_pos WHERE page_id=674672 GROUP by lang_id; * * @return empty array if data is absent */ public static TLang[] getLanguages (Connect connect,TPage page) { if(null == page) { System.out.println("Error (TLangPOS.get()):: null argument: page."); return null; } StringBuilder str_sql = new StringBuilder(); List<TLang> list_lang = null; try { Statement s = connect.conn.createStatement (); try { // SELECt lang_id FROM lang_pos WHERE page_id=674672 GROUP by lang_id str_sql.append("SELECt lang_id FROM lang_pos WHERE page_id="); str_sql.append(page.getID()); str_sql.append(" GROUP by lang_id"); ResultSet rs = s.executeQuery (str_sql.toString()); try { while (rs.next ()) { TLang l = TLang.getTLangFast( rs.getInt("lang_id")); if(null != l) { if(null == list_lang) list_lang = new ArrayList<TLang>(); list_lang.add(l); } } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TLangPOS.getLanguages()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } if(null == list_lang) return NULL_TLANG_ARRAY; return ((TLang[])list_lang.toArray(NULL_TLANG_ARRAY)); } /** Selects row from the table 'lang_pos' by ID.<br><br> * SELECT page_id,lang_id,pos_id,etymology_n,lemma FROM lang_pos WHERE id=8; * @return null if data is absent */ public static TLangPOS getByID (Connect connect,int id) { StringBuilder str_sql = new StringBuilder(); TLangPOS lang_pos = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT page_id,lang_id,pos_id,etymology_n,lemma FROM lang_pos WHERE id="); str_sql.append(id); ResultSet rs = s.executeQuery (str_sql.toString()); try { if (rs.next ()) { TPage page = TPage.getByID (connect, rs.getInt("page_id")); TLang lang = TLang.getTLangFast( rs.getInt("lang_id")); TPOS pos = TPOS. getTPOSFast ( rs.getInt("pos_id")); int etymology_n = rs.getInt("etymology_n"); String lemma = Encodings.bytesToUTF8( rs.getBytes("lemma")); if(null != lang && null != pos) { lang_pos = new TLangPOS(id, page, lang, pos, etymology_n, lemma); } } } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TLangPOS.getByID()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return lang_pos; } /** Deletes all rows from the table 'lang_pos' by page_id.<br><br> * DELETE FROM lang_pos WHERE page_id=1; * @param id unique ID in the table `lang_pos` */ public static void delete (Connect connect,TPage page) { if(null == page) { System.out.println("Error (TLangPOS.delete()):: null argument page."); return; } StringBuilder str_sql = new StringBuilder(); try { Statement s = connect.conn.createStatement (); try { str_sql.append("DELETE FROM lang_pos WHERE page_id="); str_sql.append(page.getID()); s.execute (str_sql.toString()); } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TLangPOS.delete()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } } /** Deletes all rows from the table 'lang_pos' by page_id.<br><br> * DELETE FROM lang_pos WHERE page_id=1; * @param id unique ID in the table `lang_pos` */ /*public static boolean intersectionIsNotEmpty (TPage page) { return false; }*/ }