/* TLabel.java - SQL operations with the table 'label' in Wiktionary * parsed database. * * Copyright (c) 2013 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikokit.base.wikt.sql.label; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import wikokit.base.wikipedia.language.Encodings; import wikokit.base.wikipedia.language.LanguageType; import wikokit.base.wikipedia.sql.Connect; import wikokit.base.wikipedia.sql.PageTableBase; import wikokit.base.wikipedia.sql.Statistics; import wikokit.base.wikipedia.sql.UtilSQL; import wikokit.base.wikt.constant.Label; import wikokit.base.wikt.constant.LabelCategory; import wikokit.base.wikt.multi.en.name.LabelEn; import wikokit.base.wikt.multi.ru.name.LabelRu; import wikokit.base.wikt.sql.TLang; import wikokit.base.wikt.sql.TMeaning; /** An operations with the table 'label' (context labels) in MySQL Wiktionary_parsed database. * * Attention: LabelEn used instead of Label, because there is need in LabelCategory * (category_id in the table label). */ public class TLabel { /** Unique identifier in the table 'label'. */ private int id; /** Context label short name. */ private String short_name; /** Context label full name. */ private String name; /** Category of context label (category_id). NULL means that label_category is unknown. * * A] The label was gathered automatically by parser if: * * (1) category_id is NULL or (2) category_id = "regional automatic". * * B] The label was added manually to the code of parser if * (1) category_id is not NULL and (2) category_id != "regional automatic". */ private TLabelCategory label_category; /** Number of definitions with this context label. */ private int counter; /** Map from label to ID.*/ private static Map<Label, Integer> label2id; /** Map from ID to label.*/ private static Map<Integer, Label> id2label; /** Gets the map from label to ID (ID in the table 'label'). * * REM: the functions createFastMaps() should be run at least once, * before this function execution. */ public static Map<Label, Integer> getAllLabels2ID() { return label2id; } /** Gets the map from label ID (ID in the table 'label') to label. * * REM: the functions createFastMaps() should be run at least once, * before this function execution. */ public static Map<Integer, Label> getAllID2Labels() { return id2label; } /** Gets label by ID from the table 'label'.<br><br> * REM: createFastMaps() should be run at least once, before this function execution. */ public static Label getLabelFast(int id) { if(null == id2label) { System.out.println("Error (wikt_parsed TLabel.getTLabelFast()):: What about calling 'createFastMaps()' before?"); return null; } if(id <= 0) { System.out.println("Error (wikt_parsed TLabel.getTLabelFast()):: argument id <=0, id = "+id); return null; } return id2label.get(id); } /** Gets SQL returning labels found automatically: * (1) category_id is NULL or (2) category_id = "regional automatic". */ private static String getSQLWhereCategoryId_FoundByParser() { int cat_id = TLabelCategory.getIDFast(LabelCategory.regional_automatic); return "WHERE category_id is NULL OR category_id="+cat_id; } private static String getSQLWhereCategoryId_AddedByHand() { int cat_id = TLabelCategory.getIDFast(LabelCategory.regional_automatic); return "WHERE (category_id IS NOT NULL) AND (category_id <> "+cat_id+")"; } /** Gets all labels from the table 'label' of database, * which were found automatically: (1) category_id is NULL or (2) category_id = "regional automatic". */ public static Collection<Label> getLabelsFoundByParserFromDatabase(Connect wikt_parsed_conn, LanguageType native_lang) { Statement s = null; ResultSet rs= null; Collection<Label> result = new ArrayList<>(); try { s = wikt_parsed_conn.conn.createStatement(); StringBuilder str_sql = new StringBuilder(); str_sql.append("SELECT short_name, category_id FROM label "); str_sql.append( TLabel.getSQLWhereCategoryId_FoundByParser() ); s.executeQuery (str_sql.toString()); rs = s.getResultSet (); while (rs.next ()) { String short_name = Encodings.bytesToUTF8(rs.getBytes("short_name")); LabelCategory _lc = TLabelCategory.getLabelCategoryFast( rs.getInt("category_id") ); Label _label = null; LanguageType l = native_lang; if(l == LanguageType.ru) { _label = new LabelRu(short_name); _label.setCategory(_lc); } else if(l == LanguageType.en) { _label = new LabelEn(short_name, _lc); } /*if(null == _lc) { _label = new LabelRu(short_name); } else { _label = new LabelEn(short_name, _lc); }*/ result.add(_label); } } catch(SQLException ex) { System.out.println("SQLException (TLabel.getLabelsFoundByParserFromDatabase()): " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } return result; } /** Read all records from the table 'label', * fills the internal map from a table ID to a label.<br><br> * * REM: during a creation of Wiktionary parsed database * the functions recreateTable() should be called (before this function). */ public static void createFastMaps(Connect connect, LanguageType wikt_lang) { System.out.println("Loading table `label`..."); int size = Statistics.Count(connect, "label"); if(0==size) { System.out.println("Error (wikt_parsed TLabel.createFastMaps()):: The table `label` is empty!"); return; } if(null != label2id && label2id.size() > 0) label2id.clear(); if(null != id2label && id2label.size() > 0) id2label.clear(); label2id = new LinkedHashMap<>(size); id2label = new LinkedHashMap<>(size); // get labels found by parser TLabel.getLabelsFoundByParserFromDatabase(connect, wikt_lang); // labels found by parser + labels by hand Collection<Label> labs = Label.getAllLabels(wikt_lang); for(Label label : labs) { String short_name = label.getShortName(); int id = getIDByShortName(connect, short_name, "createFastMaps"); if (0 == id) { System.out.println("Error (wikt_parsed TLabel.createFastMaps()):: There is label short name='"+short_name+"' which is absent in table 'label'!"); continue; } label2id.put(label, id); id2label.put(id, label); } int labels_in_class = Label.size(wikt_lang); if(size != labels_in_class) System.out.println("Warning (wikt_parsed TLabel.createFastMaps()):: Label.size (" + labels_in_class + ") is not equal to size of table 'label'("+ size +"). Is the database outdated?"); } /** Deletes all records from the table 'label', * loads labels names from LabelXX.java (XX is parameter wikt_lang), * sorts by name, * fills the table. */ public static void recreateTable(Connect connect, Map<LabelCategory, Integer> category2id, LanguageType wikt_lang) { System.out.println("Recreating the table `label`..."); Map<Integer, Label> _id2label = fillLocalMaps(wikt_lang); UtilSQL.deleteAllRecordsResetAutoIncrement(connect, "label"); fillDB(connect, _id2label, category2id); { int db_current_size = wikokit.base.wikipedia.sql.Statistics.Count(connect, "label"); assert(db_current_size == Label.size(wikt_lang)); // ~ ??? labels entered by hand } } /** Load data from a Label class, sorts, * and fills the local map 'id2label'. * * @param wikt_lang defines LabelXX.java where XX is ru, en, de, fr... */ public static Map<Integer, Label> fillLocalMaps(LanguageType wikt_lang) { int size = Label.size(wikt_lang); List<String>list_label = new ArrayList<>(size); list_label.addAll(Label.getAllLabelShortNames(wikt_lang)); Collections.sort(list_label); // OK, we have list of labels (short names). Sorted list 'list_label' // Local map from id to Label. It is created from data in LabelXX.java or LabelRu.java // It is used to fill the table 'label' in right sequence. Map<Integer, Label> _id2label = new LinkedHashMap<>(size); for(int id=0; id<size; id++) { String s = list_label.get(id); // s - context label name assert(Label.hasShortName(s, wikt_lang)); //System.out.println("fillLocalMaps---id="+id+"; s="+s); _id2label.put(id+1, Label.getByShortName(s, wikt_lang)); // ID in MySQL starts from 1 } return _id2label; } /** Fills database table 'label' by data from LabelXX class. */ public static void fillDB( Connect connect, Map<Integer, Label> id2label, Map<LabelCategory, Integer> category2id) { for(int id : id2label.keySet()) { Label label = id2label.get(id); if(null == label) continue; LabelCategory lc = label.getLinkedLabelEn().getCategory(); int category_id = category2id.get(lc); // short_name, name, category_id, counter (default 0) insert (connect, label.getShortName(), label.getName(), category_id); } } /** Calculates number of labels in the table 'label_meaning', * stores statistics to the field: 'label.counter'. <br><br> * * REM: this func should be called after the a creation of Wiktionary * parsed database, and the tables should be filled with data. * * @param native_lang native language in the Wiktionary, * e.g. Russian language in Russian Wiktionary */ public static void calcCounterStatistics(Connect connect, LanguageType native_lang) { System.out.println("Fill `label.counter` by statistics from the table 'label_meaning' ..."); Map<Integer, Label> _id2label = TLabel.getAllID2Labels(); for(int label_id : _id2label.keySet()) { int counter = TLabelMeaning.countRecordsWithLabelID(connect, label_id); // SELECT COUNT(*) FROM label_meaning WHERE label_id = 3; update(connect, label_id, counter); // UPDATE label SET counter=7 WHERE id=3; } } /** Counts number of labels added by hand. <br><br> * * SELECT COUNT(*) FROM label WHERE category_id correspond to labels added by hand; * * @return 0 means error **/ public static int countLabelsAddedByHand(Connect connect) { Statement s = null; ResultSet rs= null; int size = 0; StringBuilder str_sql = new StringBuilder(); if(null==connect || null==connect.conn) return 0; try { s = connect.conn.createStatement (); str_sql.append("SELECT COUNT(*) AS size FROM label "); str_sql.append( TLabel.getSQLWhereCategoryId_AddedByHand() ); rs = s.executeQuery (str_sql.toString()); if (rs.next ()) size = rs.getInt("size"); } catch(SQLException ex) { System.out.println("SQLException (TLabel.countLabelsAddedByHand()): sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } } } return size; } /** Counts number of labels found by parser (automatically). <br><br> * * select COUNT(*) FROM label WHERE category_id was found by parser; * * @return 0 means error **/ public static int countLabelsFoundByParser(Connect connect) { Statement s = null; ResultSet rs= null; int size = 0; StringBuilder str_sql = new StringBuilder(); if(null==connect || null==connect.conn) return 0; try { s = connect.conn.createStatement (); str_sql.append("SELECT COUNT(*) AS size FROM label "); str_sql.append( TLabel.getSQLWhereCategoryId_FoundByParser() ); rs = s.executeQuery (str_sql.toString()); if (rs.next ()) size = rs.getInt("size"); } catch(SQLException ex) { System.out.println("SQLException (TLabel.countLabelsAddedByHand()): sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } } } return size; } /** Inserts record into the table 'label'.<br><br> * INSERT INTO label (short_name, name, category_id) VALUES ("short name", "name", 1); * @param short_name label itself * @param name full name of label * @param category_id ID of label category from the table 'label_category' */ public static int insert (Connect connect,String short_name, String name, int category_id) { if(null == short_name || short_name.length() == 0) return 0; int result_id = 0; StringBuilder str_sql = new StringBuilder(); try { try (Statement s = connect.conn.createStatement ()) { str_sql.append("INSERT INTO label (short_name, name, category_id) VALUES (\""); String safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, short_name); str_sql.append(safe_title); str_sql.append("\",\""); safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, name); str_sql.append(safe_title); str_sql.append("\","); str_sql.append(category_id); str_sql.append(")"); s.executeUpdate (str_sql.toString(), Statement.RETURN_GENERATED_KEYS); ResultSet rs = s.getGeneratedKeys(); if (rs.next()){ result_id = rs.getInt(1); } } }catch(SQLException ex) { System.out.println("SQLException (wikt_parsed TLabel.insert(with category_id)):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return result_id; } /** Inserts record into the table 'label', gets last inserted ID.<br><br> * INSERT INTO label (short_name, name) VALUES ("short name", "name"); * @param short_name label itself * @param name full name of label * * @return last inserted ID, 0 means error */ public static int insert (Connect connect,String short_name, String name) { if(null == short_name || short_name.length() == 0) return 0; int result_id = 0; StringBuilder str_sql = new StringBuilder(); try { try (Statement s = connect.conn.createStatement ()) { str_sql.append("INSERT INTO label (short_name, name) VALUES (\""); String safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, short_name); str_sql.append(safe_title); str_sql.append("\",\""); safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, name); str_sql.append(safe_title); str_sql.append("\")"); s.executeUpdate (str_sql.toString(), Statement.RETURN_GENERATED_KEYS); ResultSet rs = s.getGeneratedKeys(); if (rs.next()){ result_id = rs.getInt(1); } } }catch(SQLException ex) { System.out.println("SQLException (wikt_parsed TLabel.insert(without category_id)):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return result_id; } /** Selects ID from the table 'label' by a label short name.<br><br> * SELECT id FROM label WHERE short_name="context"; * @param short_name name of label category * @return 0 if a label name is empty in the table 'label_category' */ public static int getIDByShortName (Connect connect,String short_name,String page_title) { if(null == short_name || short_name.isEmpty()) return 0; int result_id = 0; StringBuilder str_sql = new StringBuilder(); try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id FROM label WHERE short_name=\""); String safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, short_name); str_sql.append(safe_title); str_sql.append("\""); try (ResultSet rs = s.executeQuery (str_sql.toString())) { if (rs.next ()) result_id = rs.getInt("id"); //else // too many to print System.out.println("Warning: (TLabel.getIDByShortName()):: name '" + short_name + "' is absent in the table 'label', page_title="+page_title+"."); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TLabel.getIDByShortName()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return result_id; } /** Updates values (n_foreign_POS, n_translations) in the table 'lang'. <br><br> * * UPDATE label SET counter=7 WHERE id=3; */ public static void update (Connect connect,int label_id,int counter) { StringBuilder str_sql = new StringBuilder(); try { Statement s = connect.conn.createStatement (); try { // UPDATE label SET counter=7 WHERE id=3; str_sql.append("UPDATE label SET counter="); str_sql.append(counter); str_sql.append(" WHERE id="); str_sql.append(label_id); s.executeUpdate (str_sql.toString()); } finally { s.close(); } }catch(SQLException ex) { System.out.println("SQLException (TLabel.update()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } } /** Stores context labels related to this meaning into table: * 'label_meaning'. New labels will be stored to the table 'label' automatically. * * @param page_title word described in this article * @param _meaning corresponding record in table 'meaning' to this relation * @param lang language of this meaning * @param _labels labels extracted from the definition of this meaning */ public static void storeToDB (Connect connect,String page_title, TMeaning _meaning, TLang _lang, Label[] _labels) { if(null == _meaning || _labels.length == 0) return; for(Label la : _labels) { // 1. if 'la' is new label then add 'la' to the table 'label' (label.added_by_hand = false). int label_id = TLabel.getIDByShortName(connect, la.getShortName(), page_title); if(0 == label_id) { LabelCategory la_category = LabelEn.getCategoryByLabel(la); if(la_category == LabelCategory.regional) { // this is new regional label found by parser (it is absent in the official regional labels list) int cat_id = TLabelCategory.getIDFast(LabelCategory.regional_automatic); label_id = TLabel.insert (connect, la.getShortName(), la.getName(), cat_id); } else { label_id = TLabel.insert (connect, la.getShortName(), la.getName()); } } // 2. add to the table 'label_meaning' the record (la.label_id, _meaning.id) TLabelMeaning.insert( connect, page_title, label_id, _meaning.getID()); } } }