package com.personalityextractor.store;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.personalityextractor.entity.WikipediaEntity;
public class WikiminerDB {
private static MysqlStore db = null;
private static WikiminerDB instance = null;
private static String host;
private static String user;
private static String passwd;
private static String database;
private WikiminerDB(String host, String user, String passwd, String database)
throws Exception {
db = new MysqlStore(host, user, passwd, database);
}
public static WikiminerDB getInstance(String host, String user,
String passwd, String database) throws Exception {
if (instance == null) {
WikiminerDB.database = database;
WikiminerDB.user = user;
WikiminerDB.passwd = passwd;
WikiminerDB.host = host;
instance = new WikiminerDB(host, user, passwd, database);
}
return instance;
}
public static WikiminerDB getInstance() {
return instance;
}
public List<WikipediaEntity> search(String terms) {
List<WikipediaEntity> entities = new ArrayList<WikipediaEntity>();
String query = "SELECT page_id, page_title, page_type, inlinks FROM page_indexed WHERE MATCH(page_title) AGAINST('"
+ terms.toLowerCase()
+ "' IN BOOLEAN MODE) AND page_type = 1 ORDER BY inlinks desc limit 20";
ResultSet rs = db.execute(query);
try {
while (rs.next()) {
String id = rs.getString("page_id");
String title = rs.getString("page_title");
String inlinks = rs.getString("inlinks");
int type = Integer.valueOf(rs.getString("page_type"));
entities.add(new WikipediaEntity(title, id, type, inlinks));
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (rs != null) {
try {
rs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
return entities;
}
public double compare(String id1, String id2) {
// System.out.println("comparing " + id1 + ":" + id2);
// System.gc();
double sim = 0;
if (id1 == null || id2 == null || id1.equals("") || id2.equals("")) {
return sim;
}
Set<String> categories1 = new HashSet<String>();
Set<String> categories2 = new HashSet<String>();
String query1 = "SELECT cl_parent FROM categorylink WHERE cl_child = "
+ id1 + " LIMIT 200";
String query2 = "SELECT cl_parent FROM categorylink WHERE cl_child = "
+ id2 + " LIMIT 200";
// System.out.println(query1);
// System.out.println(query2);
ResultSet rs = db.execute(query1);
try {
while (rs.next()) {
String id = rs.getString("cl_parent");
categories1.add(id);
}
rs.close();
db.closeStmt();
} catch (Exception e) {
e.printStackTrace();
return 0;
}
rs = db.execute(query2);
try {
while (rs.next()) {
String id = rs.getString("cl_parent");
categories2.add(id);
}
rs.close();
db.closeStmt();
} catch (Exception e) {
e.printStackTrace();
return 0;
}
double intersection = 0;
for (String id11 : categories1) {
if (categories2.contains(id11)) {
intersection++;
}
}
if (intersection > 0 && (categories1.size() + categories2.size()) > 0) {
sim = intersection * 2 / (categories1.size() + categories2.size());
}
return sim;
}
public void populateInLinks(int index) {
ResultSet rs = null;
int refresh = 0;
do {
String query = "SELECT * from pagelink_in LIMIT " + index
+ ", 1000";
try {
rs = db.execute(query);
while (rs.next()) {
String id = rs.getString("li_id");
String data = rs.getString("li_data");
String[] ids = data.split(":");
db.executeUpdate("UPDATE page_indexed set inlinks = "
+ (ids.length - 1) + " WHERE page_id = " + id);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (rs != null) {
try {
rs.close();
db.closeStmt();
} catch (Exception e) {
e.printStackTrace();
}
}
}
System.out.println(index);
// if(index%1000000 == 0 && refresh != 0) {
// break;
// }
index += 1000;
refresh++;
if (refresh == 5) {
// this.reconnect();
}
} while (index < 6700000);
}
public List<WikipediaEntity> getCategories(String id) {
List<WikipediaEntity> categories = new ArrayList<WikipediaEntity>();
String query = "SELECT page_id, page_title, page_type, inlinks FROM categorylink, page_indexed WHERE cl_child = "
+ id + " AND page_id = cl_parent LIMIT 50";
ResultSet rs = db.execute(query);
try {
while (rs.next()) {
String cid = rs.getString("page_id");
String ctitle = rs.getString("page_title");
int type = Integer.valueOf(rs.getString("page_type"));
categories.add(new WikipediaEntity(ctitle, cid, type));
}
rs.close();
db.closeStmt();
} catch (Exception e) {
e.printStackTrace();
}
for(WikipediaEntity c : categories) {
query = "SELECT page_type, page_title, MATCH(page_title) AGAINST(\"" + c.getText() + "\") as relevance" +
" FROM page_indexed WHERE MATCH(page_title) AGAINST(\"" + c.getText() + "\") " +
"AND page_type = 1 order by relevance desc limit 25";
rs = db.execute(query);
try {
while(rs.next()) {
String title = rs.getString("page_title");
if(title.equalsIgnoreCase(c.getText())) {
c.setType(1);
break;
}
}
rs.close();
db.closeStmt();
} catch(Exception e) {
e.printStackTrace();
}
}
return categories;
}
private boolean reconnect() {
this.db = null;
try {
db = new MysqlStore(host, user, passwd, database);
} catch (Exception e) {
e.printStackTrace();
return false;
}
System.gc();
return true;
}
}