/* IndexForeign.java - SQL operations with the tables 'index_XX' in Wiktionary * parsed database, where XX is a language code. * * Copyright (c) 2009-2012 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikokit.base.wikt.sql.index; import wikokit.base.wikt.sql.TPage; import wikokit.base.wikipedia.sql.Connect; import wikokit.base.wikt.sql.TLangPOS; import wikokit.base.wikipedia.language.LanguageType; import java.util.Map; import java.util.List; import java.util.ArrayList; import android.database.Cursor; import android.database.sqlite.SQLiteDatabase; /** The table 'index_XX' - wordlist of words in language with code XX * (table in Wiktionary parsed database). */ public class IndexForeign { /** Unique identifier in the table 'index_XX'. */ //private int id; /** 'foreign_page' is not NULL if index_XX.foreign_has_definition==1, i.e. * there is the the Wiktionary article with the title 'foreign_word' and * this article has non-empty definition section. * * If 'foreign_page' is NULL, then use string 'foreign_word'. */ private TPage foreign_page; /** Word in foreign language XX. */ private String foreign_word; /** Corresponded page.page_title of the Wiktionary article in native language. * It could be NULL. */ private TPage native_page; //private String native_page_title; private final static IndexForeign[] NULL_INDEXFOREIGN_ARRAY = new IndexForeign[0]; public IndexForeign(TPage _foreign_page,String _foreign_word, TPage _native_page) { foreign_page = _foreign_page; foreign_word = _foreign_word; native_page = _native_page; } /** Gets page in native language. */ public TPage getNativePage() { return native_page; } /** Gets page in foreign language. */ public TPage getForeignPage() { return foreign_page; } /** Gets the title of page in foreign language. */ public String getForeignWord() { return foreign_word; } /** Gets concatenation of foreign word, delimiter, and a word in native language. */ public String getConcatForeignAndNativeWords(String delimiter) { if(null == native_page) return foreign_word; else return foreign_word + delimiter + native_page.getPageTitle(); } /** Inserts record into the table 'index_XX'.<br><br> * INSERT INTO index_en (foreign_word,foreign_has_definition,native_page_title) VALUES ("water13",0,"вода13"); * * @param foreign_word word in foreign language XX * @param foreign_has_definition true, if there is any definition in the Wiktionary article with the title foreign_word * @param native_page_title the corresponded page.page_title of the * Wiktionary article in native language (it could be null) * @param native_lang native language in the Wiktionary, * e.g. Russian language in Russian Wiktionary, * @param foreign_lang foreign language XX * */ /*public static void insert (Connect connect, String foreign_word, boolean foreign_has_definition, String native_page_title, LanguageType native_lang, LanguageType foreign_lang ) { if(foreign_lang == native_lang) return; StringBuilder str_sql = new StringBuilder(); boolean b_native_word = null != native_page_title && native_page_title.length() > 0; String table_name = "`index_" + foreign_lang.toTablePrefix() + "`"; if(b_native_word) str_sql.append("INSERT INTO "+table_name+" (foreign_word,foreign_has_definition,native_page_title) VALUES (\""); else str_sql.append("INSERT INTO "+table_name+" (foreign_word,foreign_has_definition) VALUES (\""); String safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore( connect, foreign_word); str_sql.append(safe_title); str_sql.append("\","); str_sql.append(foreign_has_definition); if(b_native_word) { str_sql.append(",\""); safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore( connect, native_page_title); str_sql.append(safe_title); str_sql.append("\""); } //System.out.println(" foreign_word=" + foreign_word + // "; foreign_has_definition=" + foreign_has_definition + // "; native_page_title=" + native_page_title + // "\n where SQL=" + str_sql + // " (IndexForeign.insert)"); str_sql.append(")"); try { Statement s = connect.conn.createStatement (); try { s.executeUpdate (str_sql.toString()); } finally { s.close(); } }catch(SQLException ex) { System.err.println("SQLException (IndexForeign.insert()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } }*/ /** Inserts a record into the table 'index_XX' only if a pair * (foreign_word,native_page_title) is absent. * * // If foreign_word == native_page_title then insert only (foreign_word,NULL). * * @param foreign_word word in foreign language XX * @param foreign_has_definition true, if there is any definition in the Wiktionary article with the title foreign_word * @param native_page_title the corresponded page.page_title of the * Wiktionary article in native language (it could be null) * @param native_lang native language in the Wiktionary, * e.g. Russian language in Russian Wiktionary * @param foreign_lang foreign language XX * */ /*public static void insertIfAbsent (Connect conn, String foreign_word, boolean foreign_has_definition, String native_page_title, LanguageType native_lang, LanguageType foreign_lang) { // if(null != native_page_title && 0 == foreign_word.compareTo(native_page_title)) // native_page_title = null; // then insert only (foreign_word,NULL) // since we want skip cases, e.g. : // word (de) -> word (en) if(native_lang == foreign_lang) { System.out.println("Error (IndexForeign.insertIfAbsent()):: native_lang == foreign_lang, It's possible that Wiktionary article about foreign word contains translation section! foreign_word='" + foreign_word + "'; native_page_title = '" + native_page_title + "'"); return; } if(!IndexForeign.has( conn, foreign_word, native_page_title, foreign_lang)) { insert (conn,foreign_word, foreign_has_definition, native_page_title, native_lang, foreign_lang); } }*/ /** Checks whether exists any row in the table 'index_foreign' (index_XX) * with a pair (foreign_word, native_page_title).<br><br> * * @param native_page_title title in native language of Wiktionary * article, it could be NULL */ public static boolean has (SQLiteDatabase db, String foreign_word, String native_page_title, LanguageType foreign_lang) { return 0 != count (db, foreign_word, native_page_title, foreign_lang); } /** Counts number of rows from the table 'index_foreign' (index_XX) * with a pair (foreign_word, native_page_title).<br><br> * * select COUNT(*) AS size from index_en WHERE foreign_word="water" AND native_page_title is NULL; * or * select COUNT(*) AS size from index_en WHERE foreign_word="water" AND native_page_title="вода"; * * @param native_page_title title in native language of Wiktionary * article, it could be NULL */ public static int count (SQLiteDatabase db, String foreign_word, String native_page_title, LanguageType foreign_lang) { if(null == foreign_word || foreign_word.length() == 0) return -1; int size = 0; final String table_name = "`index_" + foreign_lang.toTablePrefix() + "`"; StringBuilder str_sql = new StringBuilder(); str_sql.append("select COUNT(*) from ").append(table_name).append(" WHERE foreign_word=\""); str_sql.append(foreign_word); str_sql.append("\""); if(null == native_page_title) { // select COUNT(*) AS size from index_en WHERE foreign_word="water" AND native_page_title is NULL; str_sql.append(" AND native_page_title is NULL"); } else { // select COUNT(*) AS size from index_uk WHERE foreign_word="water" AND native_page_title="вода"; str_sql.append(" AND native_page_title=\""); str_sql.append(native_page_title); str_sql.append("\""); } Cursor c = db.rawQuery(str_sql.toString(), null); if (c.moveToFirst()) size = c.getInt(0); if (c != null && !c.isClosed()) { c.close(); } return size; } /** Counts number of foreign parts of speech (POS) in the table * 'index_foreign' (index_XX). * * @param foreign_lang language code (XX) */ public static int countNumberOfForeignPOS (SQLiteDatabase db, LanguageType foreign_lang) { return countNativePageTitleIsNull(db, foreign_lang, true); } /** Counts number of translations in the table 'index_foreign' (index_XX). * * @param foreign_lang language code (XX) */ public static int countTranslations (SQLiteDatabase db, LanguageType foreign_lang) { return countNativePageTitleIsNull(db, foreign_lang, false); } /** Counts number of rows from the table 'index_foreign' (index_XX) * with a pair (foreign_word, native_page_title).<br><br> * * SELECT COUNT(*) FROM index_en WHERE native_page_title is NULL; * or * SELECT COUNT(*) FROM index_en WHERE native_page_title is not NULL; * * @param native_page_title title in native language of Wiktionary * article, it could be NULL * @param is_null defines "is NULL" or "is not NULL" SQL parameter 'native_page_title' */ private static int countNativePageTitleIsNull (SQLiteDatabase db, LanguageType foreign_lang, boolean is_null) { int size = 0; final String table_name = "`index_" + foreign_lang.toTablePrefix() + "`"; StringBuilder str_sql = new StringBuilder(); str_sql.append("SELECT COUNT(*) from ").append(table_name).append(" WHERE native_page_title is "); if(is_null) { // SELECT COUNT(*) FROM index_en WHERE native_page_title is NULL; str_sql.append("NULL"); } else { // SELECT COUNT(*) FROM index_en WHERE native_page_title is not NULL; str_sql.append("not NULL"); } Cursor c = db.rawQuery(str_sql.toString(), null); if (c.moveToFirst()) size = c.getInt(0); if (c != null && !c.isClosed()) { c.close(); } return size; } /** Selects rows from the table 'index_foreign' by the prefix of foreign word.<br><br> * * SELECT foreign_word,native_page_title FROM index_en WHERE foreign_word LIKE 'water-%'; * * @param prefix_foreign_word the begining of the page_titles * @param limit constraint of the number of rows returned, * if it's negative then a constraint is omitted * @param native_lang native language in the Wiktionary, * e.g. Russian language in Russian Wiktionary * @param b_meaning return articles with definitions (constraint) * @param b_sem_rel return articles with semantic relations * * @return array of words started from the prefix (empty array if they are absent) */ public static IndexForeign[] getByPrefixForeign ( SQLiteDatabase db, String prefix_foreign_word, int limit, LanguageType native_lang, LanguageType foreign_lang, boolean b_meaning, boolean b_sem_rel ) { if(foreign_lang == native_lang || 0==limit) return NULL_INDEXFOREIGN_ARRAY; List<IndexForeign> if_list = null; //String table_name = "`index_" + foreign_lang.toTablePrefix() + "`"; String table_name = "index_" + foreign_lang.toTablePrefix(); String str_limit = ""; int limit_with_reserve = limit; if( limit_with_reserve > 0) { if(b_meaning) limit_with_reserve += 142; // since some words without meaning will be skipped if(b_sem_rel) limit_with_reserve += 1312; // since some words without relations will be skipped str_limit = "" + limit_with_reserve; } StringBuilder s_where = new StringBuilder(); if(null != prefix_foreign_word && prefix_foreign_word.length() > 0) { s_where.append("foreign_word LIKE \"" + prefix_foreign_word + "%\""); } // SELECT foreign_word,foreign_has_definition,native_page_title FROM index_en WHERE foreign_word LIKE 'water-%'; Cursor c; c = db.query(table_name, new String[] { "foreign_word", "foreign_has_definition", "native_page_title"}, s_where.toString(), null, null, null, null, str_limit); if (c.moveToFirst()) { do { int i_foreign_word = c.getColumnIndexOrThrow("foreign_word"); int i_foreign_has_definition = c.getColumnIndexOrThrow("foreign_has_definition"); int i_native_page_title = c.getColumnIndexOrThrow("native_page_title"); String _foreign_word = c.getString(i_foreign_word); boolean _foreign_has_definition = 0 != c.getInt(i_foreign_has_definition); String _native_page_title = c.getString(i_native_page_title); boolean b_add = true; if(b_meaning) // filter: words only with definitions b_add = b_add && _foreign_has_definition; if (b_add) { TPage _foreign_page = null; if(_foreign_has_definition) _foreign_page = TPage.get(db, _foreign_word); if(b_sem_rel) { if(!_foreign_has_definition || null == _foreign_page) { b_add = false; } else { _foreign_page.setLangPOS( TLangPOS.getRecursive(db, _foreign_page) ); // fills property: .foreign_page.hasSemanticRelation() } b_add = b_add && _foreign_has_definition && _foreign_page.hasSemanticRelation(); } if (b_add) { TPage _native_page = null; if(null != _native_page_title && _native_page_title.length() > 0) { _native_page = TPage.get(db, _native_page_title); } IndexForeign _if = new IndexForeign( _foreign_page, _foreign_word, _native_page); if(null == if_list) if_list = new ArrayList<IndexForeign>(); if_list.add(_if); //System.out.println("IndexForeign:getByPrefixForeign foreign_word=" + foreign_word + // "; foreign_has_definition=" + foreign_has_definition + // "; native_page_title=" + native_page_title); } } } while (c.moveToNext() && (limit_with_reserve < 0 || null == if_list || if_list.size() < limit_with_reserve)); } if (c != null && !c.isClosed()) { c.close(); } if(null == if_list) return NULL_INDEXFOREIGN_ARRAY; return ((IndexForeign[])if_list.toArray(NULL_INDEXFOREIGN_ARRAY)); } /** Deletes a record from the table 'index_XX'.<br><br> * * DELETE FROM index_en WHERE foreign_word="water12" AND native_page_title="ru_water12"; * or * DELETE FROM index_en WHERE foreign_word="water12" AND native_page_title=NULL; * * @param foreign_word word in foreign language XX * @param native_page_title the corresponded page.page_title of the * Wiktionary article in native language (it could be null) * @param native_lang native language in the Wiktionary, * e.g. Russian language in Russian Wiktionary, * @param foreign_lang foreign language XX */ /*public static void delete (Connect connect, String foreign_word, String native_page_title, LanguageType native_lang, LanguageType foreign_lang ) { if(foreign_lang == native_lang) return; boolean b_native_word = null != native_page_title && native_page_title.length() > 0; StringBuilder str_sql = new StringBuilder(); String table_name = "`index_" + foreign_lang.toTablePrefix() + "`"; str_sql.append("DELETE FROM ").append(table_name).append(" WHERE foreign_word=\""); String safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore( connect, foreign_word); str_sql.append(safe_title).append("\""); str_sql.append(" AND native_page_title="); if(b_native_word) { safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore( connect, native_page_title); str_sql.append("\"").append(safe_title).append("\""); } else str_sql.append("NULL"); try { Statement s = connect.conn.createStatement (); try { s.executeUpdate (str_sql.toString()); } finally { s.close(); } }catch(SQLException ex) { System.err.println("SQLException (IndexForeign.delete()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } }*/ /** Generates tables 'index_XX' for each language code.<br><br> * INSERT INTO index_native (page_id,page_title,has_relation) VALUES (12,"water12",TRUE); * * <PRE> * DROP TABLE IF EXISTS `index_uk` ; * * CREATE TABLE IF NOT EXISTS `index_uk` ( * `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT , * `foreign_word` VARCHAR(255) BINARY NOT NULL COMMENT 'word (in Language \'uk\') found somewhere in the Wiktionary article' , * `native_page_title` VARCHAR(255) BINARY COMMENT 'page.page_title of this Wiktionary article in native language' , * PRIMARY KEY (`id`) , * INDEX `foreign_word` (`foreign_word` ASC) , * INDEX `native_page_title` (`native_page_title` ASC) ) * ENGINE = InnoDB * COMMENT = 'words with this language code (see table postfix)'; * </PRE> * * @see http://code.google.com/p/wikokit/wiki/Wordlist_for_each_language___Database_tables_e_g__index_en?ts=1258826116&updated=Wordlist_for_each_language___Database_tables_e_g__index_en */ /*public static void generateTables (Connect connect, LanguageType native_lang) { Statement s = null; StringBuffer str_sql = new StringBuffer(); try { s = connect.conn.createStatement (); Map<String, LanguageType> code2lang = LanguageType.getAllLanguages(); String s_native_lang = native_lang.toTablePrefix(); for(LanguageType lang_code : code2lang.values()) { if(lang_code.toTablePrefix().equalsIgnoreCase(s_native_lang)) continue; //String table_name = "index_" + lang_code.toStringASCII(); String table_name = "`index_" + lang_code.toTablePrefix() + "`"; str_sql.setLength(0); str_sql.append("DROP TABLE IF EXISTS "+ table_name); s.execute(str_sql.toString()); str_sql.setLength(0); str_sql.append("CREATE TABLE IF NOT EXISTS "+ table_name +" (" + "`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT," + "`foreign_word` VARCHAR(255) BINARY NOT NULL," + "`foreign_has_definition` TINYINT(1) NOT NULL," + "`native_page_title` VARCHAR(255) BINARY," + "PRIMARY KEY (`id`)," + "INDEX `foreign_word` (`foreign_word` (7) ASC)," + "INDEX `native_page_title` (`native_page_title` (7) ASC)," + "UNIQUE `foreign_native` (`foreign_word` ASC, `native_page_title` ASC) )" + "ENGINE = InnoDB" ); s.execute (str_sql.toString()); } } catch(SQLException ex) { System.err.println("SQLException (IndexForeign.generateTables()): sql='" + str_sql + "' " + ex.getMessage()); } finally { if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } }*/ }