package processing.hashtag.baseline;
import java.util.ArrayList;
import java.util.HashMap;
/**
* @author spujari
*
*/
public class UserTFIDFVectorCalculator{
/**
* Create a TFIDF vector for a user.
* @param user
* @return
*/
public static Vector createUserTFIDFVector(HashMap<String, HashMap<Integer, ArrayList<Long>>> userTagTimestamps,
HashMap<Integer, HashMap<String, Integer>> tagUserCount, String userName) {
Vector vector = new Vector();
for (Integer tag : userTagTimestamps.get(userName).keySet()) {
int totalNumberOfUsers = userTagTimestamps.size();
if (userTagTimestamps.get(userName).containsKey(tag)) {
int tagUsageCountOfUser = userTagTimestamps.get(userName).get(tag).size();
System.out.println(" number of times tag used by user >> " + tagUsageCountOfUser + " >> " + " user using the tags >> " + tagUserCount.get(tag).keySet().size() + " total Number of Users >> " + totalNumberOfUsers);
vector.getVector().put(tag,
getHashTagTFIDFValue(tagUsageCountOfUser, tagUserCount.get(tag).keySet().size(), totalNumberOfUsers));
}
}
return vector;
}
/**
* Get TFIDF value for a Hashtag.
* @param userName
* @param hashtag
* @return
*/
private static double getHashTagTFIDFValue(int tagUsageCountOfUser, int numberOfUsersOfTag, int numberOfUsersDataset) {
double tfIdf = 0d;
double idfScore = getHashtagIDF(numberOfUsersOfTag, numberOfUsersDataset);
System.out.println(" idfScore >> " + idfScore);
tfIdf = (double)tagUsageCountOfUser * idfScore;
System.out.println(" tfidf score >> " + tfIdf);
return tfIdf;
}
/**
* Get the IDF value of the hashtag.
* @param numberOfUsersOfTag
* @param numberOfUsers
* @return
*/
private static double getHashtagIDF(double numberOfUsersOfTag, double numberOfUsers) {
double idfUser = 0d;
if(numberOfUsersOfTag!=0){
idfUser = numberOfUsers / numberOfUsersOfTag;
}
if(idfUser != 0){
double logidfUser = Math.log(idfUser);
return logidfUser;
}else{
return idfUser;
}
}
}