package org.plista.kornakapi.core.storage;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import org.apache.commons.dbcp.BasicDataSource;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.common.IOUtils;
import org.plista.kornakapi.core.config.StorageConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MySqlKMeansDataFilter extends MySqlStorage{
private static final String GET_USER = "select user_id from (SELECT user_id, COUNT(user_id) AS nums FROM taste_preferences GROUP BY user_id ORDER BY nums DESC) as ns where nums >";
private static final String GET_USER_ITEMS_BASE = "SELECT item_id FROM taste_preferences WHERE user_id = ";
private static final String GET_ALL_RATED_ITEMS = "SELECT DISTINCT(item_id) FROM taste_preferences";
private static final Logger log = LoggerFactory.getLogger(MySqlKMeansDataFilter.class);
private int minNumUserRatings;
//private static int initialCapacity = 2000;
//private static final String test = "SELECT * FROM taste_preferences";
/**
*
* @param storageConf
*/
public MySqlKMeansDataFilter(StorageConfiguration storageConf, String label, BasicDataSource dataSource){
super(storageConf, label,dataSource);
this.minNumUserRatings = storageConf.getMinNumUserRatings();
}
/**
*
* @return
*/
public StreamingKMeansDataObject getData(){
/**
* get all userids according to the top query
*/
FastIDSet userids = this.getQuery(GET_USER + String.valueOf(minNumUserRatings));
HashMap<Long, FastIDSet> userItemIds = new HashMap<Long, FastIDSet>();
FastIDSet allItems = new FastIDSet();
int dim = userids.size();
/**
* for all users: get all items
*/
for(long userid : userids.toArray()){
String getUserItems = GET_USER_ITEMS_BASE + String.valueOf(userid);
FastIDSet userItems = getQuery(getUserItems);
allItems.addAll(userItems);
userItemIds.put(userid, userItems);
}
if (log.isInfoEnabled()) {
int numAllRatedItems = this.getQuery(GET_ALL_RATED_ITEMS).size();
int numAllConcideredItems = allItems.size();
log.info("Creating [{}] Vectors with [{}] dimensions out of [{}] items.",
new Object[] {numAllConcideredItems, dim, numAllRatedItems});
}
return new StreamingKMeansDataObject(allItems, userids, userItemIds, dim );
}
/**
* Data object containing all important variables
*
*/
public class StreamingKMeansDataObject{
private FastIDSet userids;
private HashMap<Long, FastIDSet> userItemIds;
private int dim;
private FastIDSet allItems;
public StreamingKMeansDataObject(FastIDSet allItems, FastIDSet userids, HashMap<Long, FastIDSet> userItemIds, int dim){
this.allItems = allItems;
this.userids = userids;
this.userItemIds = userItemIds;
this.dim = dim;
}
public FastIDSet getUserIDs(){
return this.userids;
}
public HashMap<Long, FastIDSet> getUserItemIDs(){
return this.userItemIds;
}
public int getDim(){
return this.dim;
}
public FastIDSet getAllItems(){
return this.allItems;
}
}
public FastIDSet getQuery(String query){
Connection conn = null;
PreparedStatement stmt = null;
ResultSet rs = null;
FastIDSet candidates = new FastIDSet();
try {
conn = dataSource.getConnection();
stmt = conn.prepareStatement(query, ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
stmt.setFetchSize(1000);
rs = stmt.executeQuery();
while (rs.next()) {
candidates.add(rs.getLong(1));
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
IOUtils.quietClose(stmt);
IOUtils.quietClose(conn);
}
return candidates;
}
public StreamingKMeansDataObject getNewData(FastIDSet userIDs, int dim){
HashMap<Long, FastIDSet> userItemIds = new HashMap<Long, FastIDSet>();
FastIDSet allItems = new FastIDSet();
for(long userid : userIDs.toArray()){
String getUserItems = GET_USER_ITEMS_BASE + String.valueOf(userid);
FastIDSet userItems = getQuery(getUserItems);
allItems.addAll(userItems);
userItemIds.put(userid, userItems);
}
if (log.isInfoEnabled()) {
int numAllRatedItems = this.getQuery(GET_ALL_RATED_ITEMS).size();
int numAllConcideredItems = allItems.size();
log.info("Creating [{}] Vectors with [{}] dimensions out of [{}] items.",
new Object[] {numAllConcideredItems,dim, numAllRatedItems });
}
return new StreamingKMeansDataObject(allItems, userIDs, userItemIds, dim );
}
}