/* * Page.java * * Copyright (c) 2005-2008 Andrew Krizhanovsky /aka at mail.iias.spb.su/ * Distributed under GNU Public License. */ package wikipedia.sql_idf; import wikipedia.util.StringUtil; import java.util.*; import java.sql.*; /** Routines to work with the table page in wiki idf database. */ public class Page { /** page (article) identifier */ private int page_id; /** page title */ private String page_title; /** number of words in the article */ private int word_count; /** Gets page's identifier */ public int getPageID() { return page_id; } /** Sets page's identifier */ public void setPageID(int _page_id) { page_id = _page_id; } /** Gets the title of the page */ public String getPageTitle(){ return page_title; } /** Sets the title of the page */ private void setPageTitle( String _page_title) { page_title = _page_title; } /** Gets a number of words in the article */ public int getWordCount() { return word_count; } /** Stores to DB (updates) the number of words in the article, * but the row with the page_title should be in the 'page' table already. */ protected void storeWordCount(java.sql.Connection conn, int _word_count) { word_count = _word_count; update (conn, page_title, _word_count); } /** Sets the number of words in the article. */ protected void setWordCount(int _word_count) { word_count = _word_count; } /** Selects page_id, word_count from the table page by the page_title, * or inserts title and count into the table if the record is absent. * * @param page_title the page's title (used for get and insert) * @param word_count number of words in the article (used for insert) * @return page_id the page's identifier (<> 0) */ public static Page getOrInsert (java.sql.Connection conn,String page_title,int word_count) { Page p = get(conn, page_title); if(null == p || 0 == p.getPageID()) { Page.insert (conn, page_title, word_count); p = Page.get(conn, page_title); } return p; } /** Selects page_id, word_count from the table page by the page_title. * * SQL: * SELECT page_id, word_count FROM page WHERE page_title='bla-bla-bla'; * * @param page_title the page's title * @return page_id the page's identifier, 0 if it's absent. */ public static Page get (java.sql.Connection conn,String page_title) { Page result = null; Statement s = null; ResultSet rs= null; StringBuffer str_sql = new StringBuffer(); try { s = conn.createStatement (); str_sql.append("SELECT page_id,word_count FROM page WHERE page_title=\""); String safe_title = StringUtil.spaceToUnderscore( StringUtil.escapeChars(page_title)); //safe_title = connect.enc.EncodeToDB(safe_title); str_sql.append(safe_title); str_sql.append("\""); s.executeQuery (str_sql.toString()); rs = s.getResultSet (); if (rs.next ()) { result = new Page(); result.page_title = page_title; result.page_id = rs.getInt("page_id"); result.word_count = rs.getInt("word_count"); } } catch(SQLException ex) { System.err.println("SQLException (wikidf Page.java get()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } return result; } /** Deletes the record with page_title. */ public static void delete (java.sql.Connection conn,String page_title) { Page p = get(conn, page_title); if(null == p || 0 == p.page_id) { return; } delete(conn, p.page_id); } /** SQL: * DELETE FROM page WHERE page_id=1; */ private static void delete (java.sql.Connection conn,int page_id) { Statement s = null; ResultSet rs= null; int size = 0; StringBuffer str_sql = new StringBuffer(); try { s = conn.createStatement (); str_sql.append("DELETE FROM page WHERE page_id="); str_sql.append(page_id); s.execute (str_sql.toString()); } catch(SQLException ex) { System.err.println("SQLException (sql_idf Page.java delete()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } } /** Inserts page title into the table page, only if it is absent in the table. * * SQL example: * INSERT INTO page (page_title,word_count) VALUES ("apple",222); * * @param page_title page title * @param word_count number of words in the article */ public static void insert (java.sql.Connection conn,String page_title,int word_count) { Statement s = null; ResultSet rs= null; StringBuffer str_sql = new StringBuffer(); try { s = conn.createStatement (); str_sql.append("INSERT INTO page (page_title,word_count) VALUES (\""); String safe_title = StringUtil.spaceToUnderscore( StringUtil.escapeChars(page_title)); //safe_title = connect.enc.EncodeToDB(safe_title); str_sql.append(safe_title); str_sql.append("\","); str_sql.append(word_count); str_sql.append(")"); //System.out.println(str_sql.toString() + "; unsafe page_title=" + page_title + "; safe_title=" + safe_title); s.executeUpdate (str_sql.toString()); }catch(SQLException ex) { System.err.println("SQLException (wikipedia.sql_idf Page.java insert()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } } /** Updates the value of number of words (word_count) in the table page * identified by the page_title.<br><br> * * SQL example: * UPDATE page SET word_count=222 WHERE page_title="apple"; * * @param page_title the page title * @param word_count new value of number of words on the page */ private static void update (java.sql.Connection conn,String page_title,int word_count) { Statement s = null; ResultSet rs= null; StringBuffer str_sql = new StringBuffer(); try { s = conn.createStatement (); // UPDATE page SET word_count=222 WHERE page_title="apple"; str_sql.append("UPDATE page SET word_count="); str_sql.append(word_count); str_sql.append(" WHERE page_title=\""); String safe_title = StringUtil.spaceToUnderscore( StringUtil.escapeChars(page_title)); //safe_title = connect.enc.EncodeToDB(safe_title); str_sql.append(safe_title); str_sql.append("\""); s.executeUpdate (str_sql.toString()); }catch(SQLException ex) { System.err.println("SQLException (wikipedia.sql_idf Page.java update()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } } /** Gets number of pages: <br> * SELECT COUNT(*) AS size FROM page */ public static int countPages(java.sql.Connection conn) { Statement s = null; ResultSet rs= null; int size = 0; String str_sql = null; try { s = conn.createStatement (); str_sql = "SELECT COUNT(*) AS size FROM page"; s.executeQuery (str_sql); rs = s.getResultSet (); if (rs.next ()) { size = rs.getInt("size"); } } catch(SQLException ex) { System.err.println("SQLException (Statistics.java CountPageForNamespace()): sql='" + str_sql + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } return size; } /** Selects data into tp_list (title of page and number of words) * from the table page by the page_id. *<PRE> * SQL: * SELECT page_id, page_title, word_count FROM page * WHERE page_id IN (732, 1707, 9912) *</PRE> * @param lemma lemma * @return Term object with initialized fields: term_id, doc_freq, corpus_freq * @see Term.fillTerms similar function (the contrary) */ public static void fillPages (java.sql.Connection conn,List<TermPage> tp_list) { if(null == tp_list || 0 == tp_list.size()) return; Statement s = null; ResultSet rs= null; StringBuffer sb = new StringBuffer(); try { s = conn.createStatement (); sb.append("SELECT page_id,page_title,word_count FROM page WHERE page_id IN ("); // Prepare SQL IN(...) via tp_list[].term.term_id int len = tp_list.size()-1; for (int i=0; i<len; i++) { sb.append(tp_list.get(i).getPageID()); sb.append(","); } sb.append(tp_list.get(len).getPageID()); // skip last comma sb.append(")"); s.executeQuery (sb.toString()); rs = s.getResultSet (); if (rs.next ()) { Map<Integer,Page> m_id_to_page = TermPage.createMapIdPage(tp_list); do { Page p = m_id_to_page.get(rs.getInt("page_id")); p.setPageTitle( rs.getString("page_title")); p.setWordCount( rs.getInt( "word_count")); } while (rs.next()); } } catch(SQLException ex) { System.err.println("SQLException (wikidf Page.java fillPages()):: sql='" + sb.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } } }