/* TPOS.java - SQL operations with the table 'part_of_speech' * in Wiktionary parsed database. * * Copyright (c) 2009-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikokit.base.wikt.sql; import wikokit.base.wikt.constant.POS; //import wikipedia.util.StringUtil; import wikokit.base.wikipedia.sql.Connect; import wikokit.base.wikipedia.sql.UtilSQL; import wikokit.base.wikipedia.sql.Statistics; import java.sql.*; import java.util.Map; import java.util.LinkedHashMap; import java.util.Collection; import java.util.List; import java.util.ArrayList; import java.util.Collections; /** An operations with the table 'part_of_speech' in Wiktionary parsed database. * The table 'part_of_speech' contains a list of POS: name and ID. */ public class TPOS { /** Unique POS identifier. */ private int id; /** Name of part of speech: code and name, e.g. 'ru' and 'Russian'. */ private POS pos; /** Map from ID to part of speech. It is created from data * in the table `part_of_speech`, which is created from data in POS.java.*/ private static Map<Integer, TPOS> id2pos; /** Map from part of speech to ID.*/ private static Map<POS, Integer> pos2id; private final static TPOS[] NULL_TPOS_ARRAY = new TPOS[0]; public TPOS(int _id,POS _pos) { id = _id; pos = _pos; } /** Gets unique ID of this part of speech. */ public int getID() { return id; } /** Gets this part of speech. */ public POS getPOS() { return pos; } /** Gets part of speech (POS with ID) from the table 'lang_pos'.<br><br> * REM: createFastMaps() should be run at least once, before this function execution. */ public static int getIDFast(POS p) { if(null == pos2id) { System.out.println("Error (wikt_parsed TPOS.getIDFast()):: What about calling 'createFastMaps()' before?"); return -1; } if(null == p) { System.out.println("Error (wikt_parsed TPOS.getIDFast()):: argument POS is null"); return -1; } return pos2id.get(p); } /** Gets part of speech by ID from the table 'lang_pos'.<br><br> * REM: createFastMaps() should be run at least once, before this function execution. */ public static TPOS getTPOSFast(int id) { if(null == id2pos) { System.out.println("Error (wikt_parsed TPOS.getTPOSFast()):: What about calling 'createFastMaps()' before?"); return null; } if(id <= 0) { System.out.println("Error (wikt_parsed TPOS.getTPOSFast()):: argument id <=0, id = "+id); return null; } return id2pos.get(id); } /** Gets part of speech from the table 'lang_pos'.<br><br> * REM: createFastMaps() should be run at least once, before this function execution. */ public static TPOS get(POS p) { return getTPOSFast(getIDFast(p)); } /** Read all records from the table 'lang_pos', * fills the internal map from a table ID to POS.<br><br> * * REM: during a creation of Wiktionary parsed database * the functions recreateTable() should be called (before this function). */ public static void createFastMaps(Connect connect) { System.out.println("Loading table `part_of_speech`..."); TPOS[] tpos = getAllTPOS(connect); int size = tpos.length; if(tpos.length != POS.size()) System.out.println("Warning (wikt_parsed TPOS.java createFastMaps()):: POS.size (" + POS.size() + ") is not equal to size of table 'part_of_speech'("+ size +"). Is the database outdated?"); if(null != id2pos && id2pos.size() > 0) id2pos.clear(); if(null != pos2id && pos2id.size() > 0) pos2id.clear(); id2pos = new LinkedHashMap<Integer, TPOS>(size); pos2id = new LinkedHashMap<POS, Integer>(size); for(TPOS t : tpos) { id2pos.put(t.getID(), t); pos2id.put(t.getPOS(), t.getID()); } } /** Gets all records from the table 'part_of_speech'. */ private static TPOS[] getAllTPOS(Connect connect) { int size = Statistics.Count(connect, "part_of_speech"); if(0==size) { System.out.println("Error (wikt_parsed TPOS.java getAllTPOS()):: The table `part_of_speech` is empty!"); return NULL_TPOS_ARRAY; } List<TPOS>tpos_list = new ArrayList<TPOS>(size); Collection<POS> pp = POS.getAllPOS(); for(POS p : pp) { TPOS t = get(connect, p); if(null != t) tpos_list.add(t); } return( (TPOS[])tpos_list.toArray(NULL_TPOS_ARRAY) ); } /** Deletes all records from the table 'part_of_speech', * loads parts of speech names from POS.java, * sorts by name, * fills the table. */ public static void recreateTable(Connect connect) { System.out.println("Recreating the table `part_of_speech`..."); Map<Integer, POS> _id2pos = fillLocalMaps(); UtilSQL.deleteAllRecordsResetAutoIncrement(connect, "part_of_speech"); fillDB(connect, _id2pos); { int db_current_size = wikokit.base.wikipedia.sql.Statistics.Count(connect, "part_of_speech"); assert(db_current_size == POS.size()); // ~ 12 POS } } /** Load data from a POS class, sorts, * and fills the local map 'id2pos'. */ public static Map<Integer, POS> fillLocalMaps() { int size = POS.size(); List<String>list_pos = new ArrayList<String>(size); list_pos.addAll(POS.getAllPOSNames()); Collections.sort(list_pos); // Collections.sort(list_pos, StringUtil.LEXICOGRAPHICAL_ORDER); // OK, we have list of POS names. Sorted list 'list_pos' // Local map from id to POS. It is created from data in POS.java. // It is used to fill the table 'part_of_speech' in right sequence. Map<Integer, POS> _id2pos = new LinkedHashMap<Integer, POS>(size); for(int id=0; id<size; id++) { String s = list_pos.get(id); // s - POS name assert(POS.has(s)); //System.out.println("fillLocalMaps---id="+id+"; s="+s); _id2pos.put(id, POS.get(s)); } return _id2pos; } /** Fills database table 'part_of_speech' by data from POS class. */ public static void fillDB(Connect connect,Map<Integer, POS> id2pos) { for(int id : id2pos.keySet()) insert (connect, id2pos.get(id)); } /** Inserts record into the table 'part_of_speech'.<br><br> * INSERT INTO part_of_speech (name) VALUES ("noun"); * @param name part of speech name, e.g. 'unknown', 'noun' */ public static void insert (Connect connect,POS p) { if(null == p) return; StringBuilder str_sql = new StringBuilder(); try { Statement s = connect.conn.createStatement (); try { str_sql.append("INSERT INTO part_of_speech (name) VALUES (\""); //String safe_title = StringUtil.spaceToUnderscore( // StringUtil.escapeChars(name)); //str_sql.append(safe_title); str_sql.append(p.toString()); str_sql.append("\")"); s.executeUpdate (str_sql.toString()); } finally { s.close(); } }catch(SQLException ex) { System.out.println("SQLException (wikt_parsed TPOS.java insert()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } } /** Selects row from the table 'part_of_speech' by a POS name.<br><br> * SELECT id FROM part_of_speech WHERE name="noun"; * @param POS part of speech class * @return null if a part of speech name is absent in the table 'part_of_speech' */ public static TPOS get (Connect connect,POS p) { if(null == p) return null; StringBuilder str_sql = new StringBuilder(); TPOS tp = null; try { Statement s = connect.conn.createStatement (); try { str_sql.append("SELECT id FROM part_of_speech WHERE name=\""); str_sql.append(p.toString()); str_sql.append("\""); ResultSet rs = s.executeQuery (str_sql.toString()); try { if (rs.next ()) tp = new TPOS(rs.getInt("id"), p); else System.out.println("Warning: (TPOS.get()):: POS (" + p.toString() + ") is absent in the table 'part_of_speech'."); } finally { rs.close(); } } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (TPOS.get()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } return tp; } /** Deletes row from the table 'part_of_speech' by the POS name.<br><br> * DELETE FROM part_of_speech WHERE name="unknown"; * @param p POS to be deleted */ public static void delete (Connect connect,POS p) { if(null == p) return; StringBuilder str_sql = new StringBuilder(); try { Statement s = connect.conn.createStatement (); try { str_sql.append("DELETE FROM part_of_speech WHERE name=\""); str_sql.append(p.toString()); str_sql.append("\""); s.execute (str_sql.toString()); } finally { s.close(); } } catch(SQLException ex) { System.out.println("SQLException (wikt_parsed TPOS.java delete()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } } }