/*
TagRecommender:
A framework to implement and evaluate algorithms for the recommendation
of tags.
Copyright (C) 2013 Dominik Kowald, Emanuel Lacic
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package itemrecommendations;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import com.google.common.primitives.Ints;
import common.Bookmark;
import common.DoubleMapComparator;
import common.Features;
import common.Similarity;
import common.Utilities;
import file.BookmarkReader;
import file.PredictionFileWriter;
/**
* Class for calculating recommendations based on Zheng Tag - Time approach
* @author elacic
*
*/
public class HuangCalculator {
public static int MAX_NEIGHBORS = 20;
private Similarity similarity;
private List<Bookmark> trainList;
private HuangApproach huangApproach;
private Map<Integer, Double> allUsersSimilarities;
private Map<Integer, Map<Integer, Double>> resourceTagWeights;
private List<Map<Integer, Double>> userTagWeights;
/**
* Constructor with needed data for calculating recommendations
* @param reader contains train data
* @param sim measure which defines how to calculate similarity between two users
* @param trainSize size of the train set
*/
public HuangCalculator(BookmarkReader reader, Similarity sim, int trainSize) {
similarity = sim;
trainList = reader.getBookmarks().subList(0, trainSize);
huangApproach = new HuangApproach(trainList);
System.out.println("Constructed Huang approach class");
allUsersSimilarities = Utilities.getAllEntities(trainList, false);
resourceTagWeights = new HashMap<Integer, Map<Integer, Double>>();
userTagWeights = new ArrayList<Map<Integer, Double>>();
fillChengWeights();
System.out.println("Filled user - resource weights.");
}
/**
* Fills for every user a map which contains weights (based on tag, time and tag-time score) for his resources
*/
private void fillChengWeights() {
for (Bookmark data : trainList) {
int user = data.getUserID();
int resource = data.getResourceID();
List<Integer> tags = data.getTags();
Map<Integer, Double> tagUserWeights = null;
if (user >= userTagWeights.size()) {
tagUserWeights = new LinkedHashMap<Integer, Double>();
userTagWeights.add(tagUserWeights);
} else {
tagUserWeights = userTagWeights.get(user);
}
Map<Integer, Double> tagResourceWeights = resourceTagWeights.get(resource);
if (tagResourceWeights == null) {
tagResourceWeights = new HashMap<Integer, Double>();
}
for (Integer tag : tags) {
if (!tagUserWeights.containsKey(tag)) {
tagUserWeights.put(tag, huangApproach.getUserTagWeight(user, tag));
}
if (!tagResourceWeights.containsKey(tag)) {
tagResourceWeights.put(tag, huangApproach.getItemTagWeight(resource, tag));
}
}
resourceTagWeights.put(resource, tagResourceWeights);
}
}
public Map<Integer, Double> getRankedResourcesListByUserWeight(int userID, boolean sorting) {
return getRankedResourcesList(userID, userTagWeights, sorting);
}
/**
* Calculates resources to recommend for a user
* @param userID user to recommend resources
* @param userResourcesWeights list containing resource weight-map for every user (index of the list is the id of the user)
* @param scoreCalculator calculator which calculates the weight-score for a user-id and a resource-id
* @param sorting should the returned recommend resource map be sorted based on the resource score
* @return ranked map with recommended resources
*/
private Map<Integer, Double> getRankedResourcesList(
int userID,
List<Map<Integer, Double>> userResourcesWeights,
boolean sorting) {
Map<Integer, Double> candidateResources = new LinkedHashMap<Integer, Double>();
// find similar users
int i = 0;
Map<Integer, Double> sortedNeighbors =
Utilities.getNeighbors(userID, -1, allUsersSimilarities, userResourcesWeights, trainList, similarity, true);
Set<Integer> targetUserResources = huangApproach.getUserResourceTagMaping().get(userID).keySet();
// extract candidate resources
for (Map.Entry<Integer, Double> neighbor : sortedNeighbors.entrySet()) {
if (i++ > MAX_NEIGHBORS) {
break;
}
// System.out.println("Neighbour: " + neighbor);
double userSimVal = neighbor.getValue();
if (userSimVal != 0.0) {
List<Integer> resources = Bookmark.getResourcesFromUser(trainList, neighbor.getKey());
for (Integer resID : resources) {
if (! targetUserResources.contains(resID)) {
Double resourceScore = candidateResources.get(resID);
resourceScore = (resourceScore != null) ? (resourceScore + userSimVal) : userSimVal;
candidateResources.put(resID, resourceScore);
}
}
}
}
Map<Integer, Double> resourceMaxScaledTagFRD = new HashMap<Integer, Double>();
// rank candidate resources with item similarity and scaled tag FRD
for (Map.Entry<Integer, Double> candidateRes : candidateResources.entrySet()) {
Map<Integer, Double> candidateResourceTags = resourceTagWeights.get(candidateRes.getKey());
double candidateSimilarity = 0.0;
for (Integer targetUserRes : targetUserResources) {
// only in first iteration of target resources calculate target user's resource FDR
if (! resourceMaxScaledTagFRD.containsKey(targetUserRes)) {
fillMaxScaledTagFRD(userID, resourceMaxScaledTagFRD, targetUserRes);
}
Map<Integer, Double> targetResourceTags = resourceTagWeights.get(targetUserRes);
Double resSimVal = Utilities.getCosineFloatSim(targetResourceTags, candidateResourceTags);
candidateSimilarity += (HuangApproach.WEIGHT * resSimVal) + ((1 - HuangApproach.WEIGHT) * resourceMaxScaledTagFRD.get(targetUserRes));
}
candidateResources.put(candidateRes.getKey(), candidateSimilarity);
}
if (sorting) {
// return the sorted resources
Map<Integer, Double> sortedRankedResources = new TreeMap<Integer, Double>(new DoubleMapComparator(candidateResources));
sortedRankedResources.putAll(candidateResources);
return sortedRankedResources;
} else {
return candidateResources;
}
}
private void fillMaxScaledTagFRD(int userID, Map<Integer, Double> resourceMaxScaledTagFRD, Integer targetUserRes) {
List<Integer> targetResTags = huangApproach.getUserResourceTagMaping().get(userID).get(targetUserRes);
double maxScaledTagFRD = Double.MIN_VALUE;
for (Integer tag : targetResTags) {
double scaledTagFRD = huangApproach.getScaledTagFRD(userID, tag);
maxScaledTagFRD = (maxScaledTagFRD < scaledTagFRD) ? scaledTagFRD : maxScaledTagFRD;
}
resourceMaxScaledTagFRD.put(targetUserRes, maxScaledTagFRD);
}
// Statics -----------------------------------------------------------------------------------------------------------------------------------------------------------
private static List<Map<Integer, Double>> startHuangUserProfile(BookmarkReader reader, int trainSize, HuangCalculator calculator) {
List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) {
results.add(calculator.getRankedResourcesListByUserWeight(userID, true));
}
return results;
}
public static BookmarkReader predictSample(String filename, int trainSize) {
// read input
//filename += "_res";
BookmarkReader reader = new BookmarkReader(trainSize, false);
reader.readFile(filename);
HuangCalculator calculator = new HuangCalculator(reader, Similarity.COSINE, trainSize);
// get recommendations
List<Map<Integer, Double>> tagValues = startHuangUserProfile(reader, trainSize, calculator);
// write results
writeResults(filename, trainSize, "_huang_tag_user", reader, tagValues);
return reader;
}
private static void writeResults(String filename, int trainSize, String suffix,
BookmarkReader reader, List<Map<Integer, Double>> zhengValues) {
List<int[]> predictionValues = new ArrayList<int[]>();
for (int i = 0; i < zhengValues.size(); i++) {
Map<Integer, Double> modelVal = zhengValues.get(i);
predictionValues.add(Ints.toArray(modelVal.keySet()));
}
PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues);
writer.writeResourcePredictionsToFile(filename + suffix, trainSize, MAX_NEIGHBORS);
}
}