/* * CatCount.java * * Created on September 10, 2007, 10:54 AM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ package wikipedia.experiment; import wikipedia.sql.Connect; import java.util.List; import java.sql.Statement; import java.sql.ResultSet; import java.sql.SQLException; /** Routines to work with the table cat_count created in wiki database. * The table cat_count stores data about number of articles, sub-categories, * and hyponyms for each category in wiki. * * Table cat_count could be built by MySQL stored procedure * http://meta.wikimedia.org/wiki/Hyponyms (todo) */ public class CatCount { /** category page identifier */ private int page_id; /** category page title */ private String page_title; /** The depth of a node n is the length of the path from the root to the * node. The root node is at depth zero.*/ //private int n_depth; /** Number of direct sub-categories (childrens). It is zero for * category-leaf.*/ //private int n_subcat; /** Number of articles which have this category.*/ //private int n_articles; /** n_subcat + n_articles + n_hyponyms_of_sub-categories */ //private int n_hyponyms; /** Infromation content.*/ private float ic; /** Creates a new instance of CatCount */ public CatCount() {} /** Gets category page title. */ public String getPageTitle () { return page_title; }; /** Gets category page Information Content. */ public float getInformationContent() { return ic; }; /** Selects row with maximum ic from the table cat_count by the page_title. * * SQL (SimpleWiki): * SELECT page_id,page_title, ic FROM cat_count WHERE page_title IN ("Internet_slang", "Websites"); * * @param titles array of titles of page to select from * @return CatCount object with initialized fields, null if titles is null */ public static CatCount getMaxIC(java.sql.Connection conn, String[] titles) { CatCount result = null; CatCount cur = new CatCount(); if(null == titles || 0 == titles.length) return null; Statement s = null; ResultSet rs= null; StringBuffer str_sql = new StringBuffer(); try { s = conn.createStatement (); //SELECT page_id,page_title,ic FROM cat_count WHERE page_title IN ("Internet_slang", "Websites"); str_sql.append("SELECT page_id,page_title,ic FROM cat_count WHERE page_title IN (\""); // Prepare SQL IN(...) by data from titles[] for (int i=0; i<titles.length-1 && titles.length>0; i++) { str_sql.append(titles[i]); str_sql.append("\",\""); } str_sql.append(titles[ titles.length-1 ]); // skip last comma str_sql.append("\")"); s.executeQuery (str_sql.toString()); rs = s.getResultSet (); while (rs.next ()) { cur.ic = rs.getFloat("ic"); if(null == result || cur.ic > result.ic) { if(null == result) { result = new CatCount(); } /*if(cur.ic < 0) { // example 1: // Gender -> Category: Biology -> Science -> ... Project (has IC=-1) // Equality -> Category: Disambiguation -> Wikipedia -> Project result.ic = 0; } else { */ result.ic = cur.ic; //} result.page_id = rs.getInt("page_id"); result.page_title = rs.getString("page_title"); } } } catch(SQLException ex) { System.err.println("wikipedia.experiment SQLException ( CatCount.java getMaxIC()):: sql='" + str_sql.toString() + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } return result; } }