package processing.hashtag.baseline;
import java.io.File;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.solr.common.util.Hash;
import common.DoubleMapComparator;
import common.Similarity;
import net.sf.javaml.utils.MathUtils;
/**
* @author spujari This tag recommendation is based on the method explained in
* @see <a href=
* "http://link.springer.com/chapter/10.1007%2F978-3-319-16354-3_65#page-1">
* with paper title @see
* "Long time no tweets, time aware personalised Hashtag Suggestion".
**/
public class ContentPersonalTemporalCalculator {
private HashMap<String, HashMap<Integer, ArrayList<Long>>> userTagTimestamps;
private HashMap<String, ArrayList<String>> network;
private List<String> users;
private HashMap<Integer, HashSet<String>> tagUserMap;
private HashMap<Integer, ArrayList<Long>> tagTimestamps;
private Map<Integer, Map<Integer, Double>> resultMapTweetSimilarity;
private PersonalisedTFIDFCalculator personalisedTFIDFCalculator;
private Map<Integer, Double> tagEntropyScore;
private int numberOfIntervals;
private double eta_l;
private double eta_h;
/**
* {@link Constructor}
*
* @param userTagTimes
* user-Tag-Timestamp map
* @param network
* user mapped to userfriend list
* @param users
* {@link List} of users which are mapped to the index and
* userId.
*/
public ContentPersonalTemporalCalculator(HashMap<String, HashMap<Integer, ArrayList<Long>>> userTagTimes,
HashMap<String, ArrayList<String>> network, List<String> users, List<String> tags, String solrUrl,
String solrCore, String sampleDir, Map<Integer, Map<Integer, Double>> resultMapTweetSimilarity) {
this.userTagTimestamps = userTagTimes;
this.network = network;
this.users = users;
this.tagUserMap = createUserHashtagVector(userTagTimes);
this.resultMapTweetSimilarity = resultMapTweetSimilarity;
this.tagTimestamps = getTagTimestamps(userTagTimes);
this.numberOfIntervals = 10000;
String serialFilePath = "./data/results/" + sampleDir + "/" + solrCore + "_hashtag_entropy_" + numberOfIntervals + ".ser";
if(new File(serialFilePath).exists()){
System.out.println("serializer file exists .. the entropy values will be loaded from the serialized file");
this.tagEntropyScore = HashtagEntropyCalculator.deSerializeHashtagEntropy(serialFilePath);
}else{
System.out.println("serializer file not exists ... entropy will be calculated and stored into the serialized file");
this.tagEntropyScore = HashtagEntropyCalculator.computeAllHashtagEntropyMap(userTagTimestamps, this.numberOfIntervals);
HashtagEntropyCalculator.serializeHashtagEntropy(this.tagEntropyScore, serialFilePath);
}
// init the constructor for personalised TFIDF vector
/*personalisedTFIDFCalculator = new PersonalisedTFIDFCalculator(userTagTimestamps,
HashtagUtil.getTagUserCount(userTagTimestamps));*/
}
public HashMap<Integer, ArrayList<Long>> getTagTimestamps(HashMap<String, HashMap<Integer, ArrayList<Long>>> userTagTimestamps){
HashMap<Integer, ArrayList<Long>> tagTimestamps = new HashMap<Integer, ArrayList<Long>>();
for(String user : userTagTimestamps.keySet()){
for(Integer hashtag : userTagTimestamps.get(user).keySet()){
ArrayList<Long> timestamps = userTagTimestamps.get(user).get(hashtag);
if(!tagTimestamps.containsKey(hashtag)){
tagTimestamps.put(hashtag, new ArrayList<Long>());
}
tagTimestamps.get(hashtag).addAll(timestamps);
}
}
for (Integer hashtag : tagTimestamps.keySet()){
Collections.sort(tagTimestamps.get(hashtag), Collections.reverseOrder());
}
return tagTimestamps;
}
/**
* Get Similarity score based on hybrid of Text Based Similarity,
* Personalised Similarity and Temporal factor.
*
* @param user
* userId of user for whom tweet is getting recommended.
* @param time
* time of tweet for temporal factor
* @param targetTweetId
* target tweetId
* @param sort
* whether to sort or not
* @return {@link Map} of tag and similarity score.
*/
public Map<Integer, Double> getSimilarityScore(int userId, long time, boolean sort) {
String user = users.get(userId);
HashMap<Integer, Double> resultMap = new HashMap<Integer, Double>();
Map<Integer, Double> userTweetSimilarityHashtagScore = this.resultMapTweetSimilarity.get(userId);
for (int hashTag : tagUserMap.keySet()) {
double hashtagScore = getHybridTextPersonalisedScore(user, hashTag, userTweetSimilarityHashtagScore);
resultMap.put(hashTag, hashtagScore);
}
return getSortedMap(getNormalisedMap(resultMap));
}
/**
* Get Similarity score based on hybrid of Text Based Similarity, Personalised Similarity
* in which each user is represented by a TFIDF vector.
* In which case each Hashtag represent the vector element and the vector
* similarity is calculated based on taking the cosine similarity into account.
* @param userId
* @param time
* @param sort
* @return
*/
public Map<Integer, Double> getSimilarityScoreVersion2(int userId, long time, boolean sort) {
System.out.println("running inside version2");
HashMap<Integer, Double> resultMap = new HashMap<Integer, Double>();
String user = users.get(userId);
// get the Tweet Similarity result map for a user which contains the score for each hashtag.
Map<Integer, Double> userTweetSimilarityHashtagScore = this.resultMapTweetSimilarity.get(userId);
// get the User similarity result map for a user.
Map<Integer, Double> userUserSimilarityHashtagScore = personalisedTFIDFCalculator.getHashTagScoreResultMap(user,
network.get(user), personalisedTFIDFCalculator.getAllUserTFIDFVector());
// for each hashtag tha the user hash used
for (int hashTag : tagUserMap.keySet()) {
double userScoreTweetSimilarity = 0;
double userScorePersonalisedSimilarity = 0;
if (userTweetSimilarityHashtagScore.containsKey(hashTag)) {
userScoreTweetSimilarity = userTweetSimilarityHashtagScore.get(hashTag);
}
if (userUserSimilarityHashtagScore.containsKey(hashTag)) {
userScorePersonalisedSimilarity = userUserSimilarityHashtagScore.get(hashTag);
}
// make a hybrid linear combination of 2 approaches
double hashtagScore = combineLinear(userScoreTweetSimilarity, userScorePersonalisedSimilarity, 0.6);
if (hashtagScore > 0) {
resultMap.put(hashTag, hashtagScore);
}
}
return getSortedMap(getNormalisedMap(resultMap));
}
/**
* Get Similarity score based on hybrid of Text Based Similarity, Personalised Similarity
* in which each user is represented by a TFIDF vector.
* In which case each Hashtag represent the vector element and the vector
* similarity is calculated based on taking the cosine similarity into account.
* @param userId
* @param time
* @param sort
* @return
*/
public Map<Integer, Double> getSimilarityScoreVersion3(int userId, long time, boolean sort) {
System.out.println("running inside version3");
System.out.println(" ContentePersonalTemporalCalculator >> eta_h " + eta_h + " >> eta_l >> " + eta_l);
HashMap<Integer, Double> resultMap = new HashMap<Integer, Double>();
Map<Integer, Double> userTweetSimCollabFiltHashtagScore = this.resultMapTweetSimilarity.get(userId);
//System.out.println(" unsorted map >> " + userTweetSimCollabFiltHashtagScore);
userTweetSimCollabFiltHashtagScore = getTopItems(userTweetSimCollabFiltHashtagScore, 1000);
//System.out.println(" chosen map >> " + userTweetSimCollabFiltHashtagScore);
for (int hashTag : userTweetSimCollabFiltHashtagScore.keySet()) {
double userScoreTweetSimilarity = 0.0;
double lastUserDiffDays = getLastUsedDiffInDays(time, hashTag);
if (userTweetSimCollabFiltHashtagScore.containsKey(hashTag)) {
userScoreTweetSimilarity = userTweetSimCollabFiltHashtagScore.get(hashTag);
}
double temporalFactor = 1.0;
if(tagEntropyScore.get(hashTag)!= null){
double entropyScore = tagEntropyScore.get(hashTag);
if(entropyScore < 0.5){
double power = -1 * eta_l * lastUserDiffDays;
temporalFactor = Math.exp(power);
//System.out.println(" power factor lower entropy >> " + power);
//System.out.println(" temporal Factor lower entropy >> " + temporalFactor);
}else{
double power = -1 * eta_h * lastUserDiffDays;
temporalFactor = Math.exp(power);
//System.out.println(" power factor high entropy >> " + power);
//System.out.println(" temporal Factor high entropy >> " + temporalFactor);
}
//System.out.println("temporal factor with entropy value >> " + temporalFactor);
temporalFactor += 0.5;
}
double hashtagScore = userScoreTweetSimilarity * temporalFactor;
//if (hashtagScore > 0.0) {
resultMap.put(hashTag, hashtagScore);
//}
}
return getSortedMap(resultMap);
}
private double getLastUsedDiffInDays(long time, int hashTag) {
double lastUsedDiffSeconds = 0;
ArrayList<Long> tagTimestampsList = this.tagTimestamps.get(hashTag);
for(Long timestamp : tagTimestampsList){
if (time > timestamp){
lastUsedDiffSeconds = time - timestamp;
break;
}
}
double lastUsedDiffDays = lastUsedDiffSeconds / 86400;
return lastUsedDiffDays;
}
/**
* Create a map of hashtag to User set from a userTagTimestamps map.
*
* @param userTagTimes
* @return
*/
private HashMap<Integer, HashSet<String>> createUserHashtagVector(
HashMap<String, HashMap<Integer, ArrayList<Long>>> userTagTimes) {
// a map of all the hashtags and List of users who have used those hash maps
HashMap<Integer, HashSet<String>> tagUserMap = new HashMap<Integer, HashSet<String>>();
for (String user : userTagTimes.keySet()) {
HashMap<Integer, ArrayList<Long>> tagTimes = userTagTimes.get(user);
for (Integer tag : tagTimes.keySet()) {
if (!tagUserMap.containsKey(tag)) {
tagUserMap.put(new Integer(tag), new HashSet<String>());
} else {
tagUserMap.get(tag).add(new String(user));
}
}
}
return tagUserMap;
}
/**
* Hybrid Text Personalised Score.
*
* @param user
* @param hashTag
* @param resultMap
* @param userTweetSimilarityHashtagScore
* @return
*/
private double getHybridTextPersonalisedScore(String user, int hashTag,
Map<Integer, Double> userTweetSimilarityHashtagScore) {
double hashtagScore = 0;
if (userTweetSimilarityHashtagScore.containsKey(hashTag)) {
hashtagScore = combineLinear(userTweetSimilarityHashtagScore.get(hashTag), PersonalisedSimilarityCalculator
.getPersonalisedSimilarity(user, network.get(user), tagUserMap.get(hashTag), userTagTimestamps),
0.5);
} else {
hashtagScore = combineLinear(0, PersonalisedSimilarityCalculator.getPersonalisedSimilarity(user,
network.get(user), tagUserMap.get(hashTag), userTagTimestamps), 0.5);
}
if (hashtagScore > 0.0) {
double temporalFactor = 1d;
hashtagScore = hashtagScore * temporalFactor;
}
return hashtagScore;
}
/**
* Combine 2 values lineary with a constant factor.
*
* @param score1
* @param score2
* @param lambda
* constant factor for linear combination.
* @return
*/
private double combineLinear(double score1, double score2, double lambda) {
double hashtagScore = lambda * score1 + (1 - lambda) * score2;
return hashtagScore;
}
/**
* Sorted Map values ascending to descending.
*
* @param sort
* @param resultMap
* @return
*/
private Map<Integer, Double> getSortedMap( Map<Integer, Double> resultMap) {
Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(resultMap));
sortedResultMap.putAll(resultMap);
System.out.println("resultMap >> " + sortedResultMap);
return sortedResultMap;
}
/**
* Normalize the values in the map.
*
* @param resultMap
* @return
*/
private Map<Integer, Double> getNormalisedMap(Map<Integer, Double> resultMap) {
double denom = 0.0;
if (resultMap != null) {
for (Map.Entry<Integer, Double> entry : resultMap.entrySet()) {
if (entry != null) {
double actVal = Math.log(entry.getValue());
denom += Math.exp(actVal);
entry.setValue(actVal);
}
}
for (Map.Entry<Integer, Double> entry : resultMap.entrySet()) {
if (entry != null) {
double actVal = Math.exp(entry.getValue());
entry.setValue(actVal / denom);
}
}
}
return resultMap;
}
private Map<Integer, Double> getTopItems(Map<Integer, Double> completeMap, int numberOfTopItems){
Map<Integer, Double> chosenMap = new HashMap<Integer, Double>();
int itemCount = 0;
for (Integer hashtag : completeMap.keySet()){
// System.out.println("hastag >> " + itemCount + " >> "+ hashtag) ;
itemCount += 1;
chosenMap.put(hashtag, completeMap.get(hashtag));
if(itemCount > numberOfTopItems){
break;
}
}
return chosenMap;
}
public double getEta_l() {
return eta_l;
}
public void setEta_l(double eta_l) {
this.eta_l = eta_l;
}
public double getEta_h() {
return eta_h;
}
public void setEta_h(double eta_h) {
this.eta_h = eta_h;
}
}