/*
TagRecommender:
A framework to implement and evaluate algorithms for the recommendation
of tags.
Copyright (C) 2013 Dominik Kowald
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package itemrecommendations;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Stopwatch;
import com.google.common.primitives.Ints;
import common.DoubleMapComparator;
import common.Features;
import common.MemoryThread;
import common.PerformanceMeasurement;
import common.Similarity;
import common.Bookmark;
import common.Utilities;
import file.PredictionFileWriter;
import file.BookmarkReader;
public class CFResourceCalculator {
public static int MAX_NEIGHBORS = 20;
//private final static double K1 = 1.2;
//private final static double K3 = 1.2;
//private final static double B = 0.8;
private BookmarkReader reader;
private boolean userBased;
private boolean resBased;
//private double beta;
Similarity sim;
private List<Bookmark> trainList;
private List<Bookmark> testList;
private List<Map<Integer, Double>> userMaps;
private Map<Integer, Double> allUsers;
private List<Map<Integer, Double>> resMaps;
private Map<Integer, Double> allResources;
private CFResourceCalculator rankedResourceCalculator;
public CFResourceCalculator(BookmarkReader reader, int trainSize, boolean predictTags, boolean userBased, boolean resBased, int beta, Similarity sim, Features features) {
this.reader = reader;
this.userBased = userBased;
this.resBased = resBased;
//this.beta = (double)beta / 10.0;
this.sim = sim;
//this.trainList = this.reader.getUserLines().subList(0, predictTags ? trainSize : reader.getUserLines().size()); // TODO
this.trainList = this.reader.getBookmarks().subList(0, trainSize);
this.testList = this.reader.getBookmarks().subList(trainSize, this.reader.getBookmarks().size());
if (this.userBased || !predictTags) {
if (features == Features.ENTITIES) {
this.userMaps = Utilities.getUsedEntities(this.trainList, false, null);
} else if (features == Features.TOPICS) {
this.userMaps = Utilities.getRelativeTopicMaps(this.trainList, false);//Utilities.getUserTopics(this.trainList);
} else if (features == Features.TAGS) {
this.userMaps = Utilities.getRelativeTagMaps(this.trainList, false);//Utilities.getUserMaps(this.trainList);
} else if (features == Features.TAG_ENTITIES) {
this.userMaps = Utilities.getUsedEntities(this.trainList, false, Utilities.getRelativeTagMaps(this.trainList, false));
}
this.allUsers = Utilities.getAllEntities(this.trainList, false);
}
if (this.resBased) {
if (features == Features.ENTITIES) {
this.resMaps = Utilities.getUsedEntities(this.trainList, true, null);
} else if (features == Features.TOPICS) {
this.resMaps = Utilities.getRelativeTopicMaps(this.trainList, true);//Utilities.getResTopics(this.trainList);
this.rankedResourceCalculator = new CFResourceCalculator(this.reader, trainSize, false, true, false, 5, Similarity.COSINE, Features.ENTITIES);
} else if (features == Features.TAGS) {
this.resMaps = Utilities.getRelativeTagMaps(this.trainList, true);//Utilities.getResMaps(this.trainList);
} else if (features == Features.TAG_ENTITIES) {
this.resMaps = Utilities.getUsedEntities(this.trainList, true, Utilities.getRelativeTagMaps(this.trainList, true));
}
this.allResources = Utilities.getAllEntities(this.trainList, true);
}
}
public Map<Integer, Double> getRankedResourcesList(int userID, int resID, boolean sorting, boolean allResources, boolean bll,
boolean filterOwnEntities, boolean recommUsers) {
List<Integer> userResources = null;
Map<Integer, Double> userBllResources = null;
if (this.resBased) {
userBllResources = Bookmark.getResourcesFromUserWithRec(this.trainList, this.testList, userID, 0.5, bll);
userResources = new ArrayList<Integer>(userBllResources.keySet());
} else if (userID != -1) {
userResources = Bookmark.getResourcesFromUser(this.trainList, userID);
}
Map<Integer, Double> rankedResources = new LinkedHashMap<Integer, Double>();
int i = 0;
//double denom = 0.0;
if (this.userBased && userID != -1) {
Map<Integer, Double> sortedNeighbors = Utilities.getNeighbors(userID, -1, this.allUsers, this.userMaps, this.trainList, this.sim, !recommUsers);
if (recommUsers) {
return sortedNeighbors;
}
for (Map.Entry<Integer, Double> neighbor : sortedNeighbors.entrySet()) {
if (i++ > MAX_NEIGHBORS) {
break;
}
if (bll) {
userBllResources = Bookmark.getResourcesFromUserWithRec(this.trainList, this.testList, neighbor.getKey(), 0.5, false);
}
double bm25 = neighbor.getValue();
//denom += bm25;
if (bm25 != 0.0) {
List<Integer> resources = Bookmark.getResourcesFromUser(this.trainList, neighbor.getKey());
for (Integer res : resources) {
if (!filterOwnEntities || !userResources.contains(res)) {
double bllVal = (bll ? userBllResources.get(res) : 1.0);
Double val = rankedResources.get(res);
double entryVal = bllVal * bm25;
rankedResources.put(res, (val != null ? val + entryVal : entryVal));
//System.out.println("add resource to list - " + resID + " " + (val != null ? val + bm25 : bm25));
}
}
}
}
}
if (this.resBased) {
//denom = 0.0;
Map<Integer, Double> sortedResources = null;
if (allResources) {
sortedResources = new LinkedHashMap<Integer, Double>();
int resCount = 0;
for (Map.Entry<Integer, Double> resEntry : userBllResources.entrySet()) {
if (resCount++ > MAX_NEIGHBORS) {
break;
}
int res = resEntry.getKey();
i = 0;
Double bllVal = (bll && userBllResources != null ? resEntry.getValue() : 1.0);
Map<Integer, Double> resources = Utilities.getSimResources(-1, res, userResources, this.allResources, this.resMaps, this.trainList, this.sim, sorting);
for (Map.Entry<Integer, Double> entry : resources.entrySet()) {
if (i++ > MAX_NEIGHBORS) {
break;
}
Double val = sortedResources.get(entry.getKey());
double entryVal = (bllVal != null ? bllVal.doubleValue() : 1.0) * entry.getValue();
sortedResources.put(entry.getKey(), val != null ? val.doubleValue() + entryVal : entryVal);
}
}
} else {
if (recommUsers) {
List<Integer> resourceUsers = null;
if (filterOwnEntities) {
resourceUsers = Bookmark.getUsersFromResource(this.trainList, resID);
} else {
resourceUsers = new ArrayList<Integer>();
}
return Utilities.getSimUsersForResource(resID, this.allUsers, this.userMaps, this.resMaps, resourceUsers, this.sim, sorting);
} else {
if (userID != -1) {
/*Map<Integer, Double> candidateSet = new LinkedHashMap<Integer, Double>();
for (Map.Entry<Integer, Double> entry : this.rankedResourceCalculator.getRankedResourcesList(userID, -1, true, false, false, true, false).entrySet()) {
if (candidateSet.size() < 100) {
candidateSet.put(entry.getKey(), entry.getValue());
}
}*/
sortedResources = Utilities.getSimResourcesForUser(userID, this.allResources, this.userMaps, this.resMaps,
filterOwnEntities ? userResources : new ArrayList<Integer>(), this.sim, sorting);
} else if (resID != -1) {
sortedResources = Utilities.getSimResources(-1, resID, null, this.allResources, this.resMaps, this.trainList, this.sim, sorting);
}
}
}
for (Map.Entry<Integer, Double> sortedRes : sortedResources.entrySet()) {
Double val = rankedResources.get(sortedRes.getKey());
rankedResources.put(sortedRes.getKey(), val != null ? val.doubleValue() + sortedRes.getValue() : sortedRes.getValue());
}
}
// denom = 0.0;
// // normalize
// for (double val : rankedResources.values()) {
// denom += Math.exp(val);
// }
// for (Map.Entry<Integer, Double> entry : rankedResources.entrySet()) {
// entry.setValue(Math.exp(entry.getValue()) / denom);
// }
if (sorting) {
// return the sorted resources
Map<Integer, Double> sortedRankedResources = new TreeMap<Integer, Double>(new DoubleMapComparator(rankedResources));
sortedRankedResources.putAll(rankedResources);
return sortedRankedResources;
} else {
return rankedResources;
}
}
// Statics -----------------------------------------------------------------------------------------------------------------------------------------------------------
private static String timeString;
public static BookmarkReader predictResources(String filename, int trainSize, int sampleSize, int neighborSize, boolean userBased, boolean resourceBased, boolean allResources, boolean bll, Features features, boolean writeTime) {
MAX_NEIGHBORS = neighborSize;
return predictSample(filename, trainSize, sampleSize, userBased, resourceBased, allResources, 5, bll, features, writeTime);
}
private static List<Map<Integer, Double>> startBM25CreationForResourcesPrediction(BookmarkReader reader, int sampleSize, boolean userBased, boolean resBased, boolean allResources, boolean bll, Features features) {
int size = reader.getBookmarks().size();
int trainSize = size - sampleSize;
Stopwatch timer = new Stopwatch();
timer.start();
CFResourceCalculator calculator = new CFResourceCalculator(reader, trainSize, false, userBased, resBased, 5, Similarity.COSINE, features);
timer.stop();
long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS);
timer.reset();
timer.start();
List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) {
Map<Integer, Double> map = null;
map = calculator.getRankedResourcesList(userID, -1, true, allResources, bll, true, false); // TODO
results.add(map);
}
timer.stop();
long testTime = timer.elapsed(TimeUnit.MILLISECONDS);
timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize);
return results;
}
public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased, boolean resBased, boolean allResources,
int beta, boolean bll, Features features, boolean writeTime) {
Timer timerThread = new Timer();
MemoryThread memoryThread = new MemoryThread();
timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN);
BookmarkReader reader = new BookmarkReader(trainSize, false);
reader.readFile(filename);
List<Map<Integer, Double>> cfValues = null;
cfValues = startBM25CreationForResourcesPrediction(reader, sampleSize, userBased, resBased, allResources, bll, features);
List<int[]> predictionValues = new ArrayList<int[]>();
for (int i = 0; i < cfValues.size(); i++) {
Map<Integer, Double> modelVal = cfValues.get(i);
predictionValues.add(Ints.toArray(modelVal.keySet()));
// just for debugging
//System.out.println(modelVal.values().toString());
}
String suffix = "_cf_";
if (!userBased) {
suffix = "_rescf_";
} else if (!resBased) {
suffix = "_usercf_";
}
if (!userBased && !allResources) {
suffix += "mixed_";
}
if (bll) {
suffix += "bll_";
}
suffix += features + "_";
PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues);
writer.writeResourcePredictionsToFile(filename + suffix + beta, trainSize, MAX_NEIGHBORS);
timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory());
timerThread.cancel();
if (writeTime) {
Utilities.writeStringToFile("./data/metrics/" + filename + suffix + beta + "_TIME.txt", timeString);
}
return reader;
}
}