/*
TagRecommender:
A framework to implement and evaluate algorithms for the recommendation
of tags.
Copyright (C) 2013 Dominik Kowald, Emanuel Lacic
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package itemrecommendations;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import com.google.common.primitives.Ints;
import common.Bookmark;
import common.DoubleMapComparator;
import common.Features;
import common.Similarity;
import common.Utilities;
import file.BookmarkReader;
import file.PredictionFileWriter;
/**
* Class for calculating recommendations based on Zheng Tag - Time approach
* @author elacic
*
*/
public class ZhengCalculator {
public static int MAX_NEIGHBORS = 20;
private static final double LAMBDA = 0.5;
private List<Bookmark> bookmarks;
private Similarity similarity;
private List<Bookmark> trainList;
private ZhengApproach zhengApproach;
private Map<Integer, Double> allUsersSimilarities;
private List<Map<Integer, Double>> userResourcesTagWeight;
private List<Map<Integer, Double>> userResourcesTimeWeight;
private List<Map<Integer, Double>> userResourcesTagTimeWeight;
/**
* Constructor with needed data for calculating recommendations
* @param reader contains train data
* @param sim measure which defines how to calculate similarity between two users
* @param trainSize size of the train set
*/
public ZhengCalculator(BookmarkReader reader, Similarity sim, int trainSize) {
bookmarks = reader.getBookmarks();
similarity = sim;
trainList = bookmarks.subList(0, trainSize);
zhengApproach = new ZhengApproach(trainList);
System.out.println("Constructed Zheng approach class");
allUsersSimilarities = Utilities.getAllEntities(trainList, false);
userResourcesTagWeight = new ArrayList<Map<Integer, Double>>();
userResourcesTimeWeight = new ArrayList<Map<Integer, Double>>();
userResourcesTagTimeWeight = new ArrayList<Map<Integer, Double>>();
fillZhengWeights();
System.out.println("Filled user - resource weights.");
}
public static List<Map<Integer, Double>> createTagTimeMapping(List<Bookmark> trainList){
List<Map<Integer, Double>> userResourcesTagTimeWeight = new ArrayList<Map<Integer, Double>>();
ZhengApproach zhengApproach = new ZhengApproach(trainList);
for (Bookmark data : trainList) {
int user = data.getUserID();
int resource = data.getResourceID();
Map<Integer, Double> resourceTagTimeWeights = null;
if (user >= userResourcesTagTimeWeight.size()) {
resourceTagTimeWeights = new LinkedHashMap<Integer, Double>();
userResourcesTagTimeWeight.add(resourceTagTimeWeights);
} else {
resourceTagTimeWeights = userResourcesTagTimeWeight.get(user);
}
Double tagTimeWeight = zhengApproach.getTagTimeWeight(user, resource, LAMBDA);
resourceTagTimeWeights.put(resource, tagTimeWeight);
}
return userResourcesTagTimeWeight;
}
public static Map<Integer, Double> createUserTagTimeMapping(int userID, List<Bookmark> trainList){
Map<Integer, Double> userResourcesTagTimeWeight = new LinkedHashMap<Integer, Double>();
ZhengApproach zhengApproach = new ZhengApproach(trainList);
for (Bookmark data : trainList) {
int user = data.getUserID();
int resource = data.getResourceID();
if (user == userID) {
Double tagTimeWeight = zhengApproach.getTagTimeWeight(user, resource, LAMBDA);
userResourcesTagTimeWeight.put(resource, tagTimeWeight);
}
}
return userResourcesTagTimeWeight;
}
/**
* Fills for every user a map which contains weights (based on tag, time and tag-time score) for his resources
*/
private void fillZhengWeights() {
for (Bookmark data : trainList) {
int user = data.getUserID();
int resource = data.getResourceID();
Map<Integer, Double> resourceTagWeights = null;
Map<Integer, Double> resourceTimeWeights = null;
Map<Integer, Double> resourceTagTimeWeights = null;
if (user >= userResourcesTagWeight.size()) {
resourceTagWeights = new LinkedHashMap<Integer, Double>();
resourceTimeWeights = new LinkedHashMap<Integer, Double>();
resourceTagTimeWeights = new LinkedHashMap<Integer, Double>();
userResourcesTagWeight.add(resourceTagWeights);
userResourcesTimeWeight.add(resourceTimeWeights);
userResourcesTagTimeWeight.add(resourceTagTimeWeights);
} else {
resourceTagWeights = userResourcesTagWeight.get(user);
resourceTimeWeights = userResourcesTimeWeight.get(user);
resourceTagTimeWeights = userResourcesTagTimeWeight.get(user);
}
Double tagWeight = zhengApproach.getTagWeight(user, resource);
Double timeWeight = zhengApproach.getTimeWeight(user, resource);
Double tagTimeWeight = zhengApproach.getTagTimeWeight(tagWeight, timeWeight, LAMBDA);
resourceTagWeights.put(resource, tagWeight);
resourceTimeWeights.put(resource, timeWeight);
resourceTagTimeWeights.put(resource, tagTimeWeight);
}
}
/**
* Calculates results based on the tag approach from Zheng
* @param userID id of the user to generate recommendations
* @param sorting if the map should be sorted by ranking score
* @return ranked map with recommended resources
*/
public Map<Integer, Double> getRankedResourcesListByTag(int userID, boolean sorting) {
return getRankedResourcesList(userID, userResourcesTagWeight, sorting);
}
/**
* Calculates results based on the time approach from Zheng
* @param userID id of the user to generate recommendations
* @param sorting if the map should be sorted by ranking score
* @return ranked map with recommended resources
*/
public Map<Integer, Double> getRankedResourcesListByTime(int userID, boolean sorting) {
return getRankedResourcesList(userID, userResourcesTimeWeight, sorting);
}
/**
* Calculates results based on the tag - time approach from Zheng
* @param userID id of the user to generate recommendations
* @param sorting if the map should be sorted by ranking score
* @return ranked map with recommended resources
*/
public Map<Integer, Double> getRankedResourcesListByTagTime(int userID, boolean sorting) {
return getRankedResourcesList(userID, userResourcesTagTimeWeight, sorting);
}
/**
* Calculates resources to recommend for a user
* @param userID user to recommend resources
* @param userResourcesWeights list containing resource weight-map for every user (index of the list is the id of the user)
* @param scoreCalculator calculator which calculates the weight-score for a user-id and a resource-id
* @param sorting should the returned recommend resource map be sorted based on the resource score
* @return ranked map with recommended resources
*/
private Map<Integer, Double> getRankedResourcesList(
int userID,
List<Map<Integer, Double>> userResourcesWeights,
boolean sorting) {
Map<Integer, Double> candidateResources = new LinkedHashMap<Integer, Double>();
// get candidates
int i = 0;
Map<Integer, Double> sortedNeighbors =
Utilities.getNeighbors(userID, -1, allUsersSimilarities, userResourcesWeights, trainList, similarity, true);
Double userSimiliaritySum = 0.0;
for (Map.Entry<Integer, Double> neighbor : sortedNeighbors.entrySet()) {
if (i++ > MAX_NEIGHBORS) {
break;
}
// System.out.println("Neighbour: " + neighbor);
double userSimVal = neighbor.getValue();
userSimiliaritySum += userSimVal;
if (userSimVal != 0.0) {
List<Integer> resources = Bookmark.getResourcesFromUser(trainList, neighbor.getKey());
for (Integer resID : resources) {
if (! userResourcesTagWeight.get(userID).containsKey(resID)) {
Double resourceScore = candidateResources.get(resID);
Double resourceWeight = userResourcesWeights.get(neighbor.getKey()).get(resID) * userSimVal;
// scoreCalculator.getScore(neighbor.getKey(), resID) * userSimVal;
resourceScore = (resourceScore != null) ? (resourceScore + resourceWeight) : resourceWeight;
candidateResources.put(resID, resourceScore);
}
}
}
}
for (Integer resource : candidateResources.keySet()) {
double resourceScore = candidateResources.get(resource) / Math.abs(userSimiliaritySum);
candidateResources.put(resource, resourceScore);
}
if (sorting) {
// return the sorted resources
Map<Integer, Double> sortedRankedResources = new TreeMap<Integer, Double>(new DoubleMapComparator(candidateResources));
sortedRankedResources.putAll(candidateResources);
return sortedRankedResources;
} else {
return candidateResources;
}
}
// Statics -----------------------------------------------------------------------------------------------------------------------------------------------------------
private static List<Map<Integer, Double>> startZhengTagCreationForResourcesPrediction(
BookmarkReader reader, int trainSize, ZhengCalculator calculator) {
List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) {
results.add(calculator.getRankedResourcesListByTag(userID, true));
}
return results;
}
private static List<Map<Integer, Double>> startZhengTimeCreationForResourcesPrediction(
BookmarkReader reader, int trainSize, ZhengCalculator calculator) {
List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) {
results.add(calculator.getRankedResourcesListByTime(userID, true));
}
return results;
}
private static List<Map<Integer, Double>> startZhengTagTimeCreationForResourcesPrediction(
BookmarkReader reader, int trainSize, ZhengCalculator calculator) {
List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) {
results.add(calculator.getRankedResourcesListByTagTime(userID, true));
}
return results;
}
public static BookmarkReader predictSample(String filename, int trainSize) {
// read input
//filename += "_res";
BookmarkReader reader = new BookmarkReader(trainSize, false);
reader.readFile(filename);
ZhengCalculator calculator = new ZhengCalculator(reader, Similarity.COSINE, trainSize);
// // get recommendations
// List<Map<Integer, Double>> tagValues = startZhengTagCreationForResourcesPrediction(reader, trainSize, calculator);
// // write results
// writeResults(filename, trainSize, "_zheng_tag", reader, tagValues);
//
// // get recommendations
// List<Map<Integer, Double>> timeValues = startZhengTimeCreationForResourcesPrediction(reader, trainSize, calculator);
// // write results
// writeResults(filename, trainSize, "_zheng_time", reader, timeValues);
// get recommendations
List<Map<Integer, Double>> tagTimeValues = startZhengTagTimeCreationForResourcesPrediction(reader, trainSize, calculator);
// write results
writeResults(filename, trainSize, "_zheng_tagtime", reader, tagTimeValues);
return reader;
}
private static void writeResults(String filename, int trainSize, String suffix,
BookmarkReader reader, List<Map<Integer, Double>> zhengValues) {
List<int[]> predictionValues = new ArrayList<int[]>();
for (int i = 0; i < zhengValues.size(); i++) {
Map<Integer, Double> modelVal = zhengValues.get(i);
predictionValues.add(Ints.toArray(modelVal.keySet()));
}
PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues);
writer.writeResourcePredictionsToFile(filename + suffix, trainSize, MAX_NEIGHBORS);
}
}