package processing;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Stopwatch;
import com.google.common.primitives.Ints;
import common.CalculationType;
import common.CooccurenceMatrix;
import common.DoubleMapComparator;
import common.Bookmark;
import common.MemoryThread;
import common.PerformanceMeasurement;
import common.Utilities;
import file.PredictionFileWriter;
import file.BookmarkReader;
public class ThreeLTCalculator {
private BookmarkReader reader;
private List<Bookmark> trainList;
private double beta; // used for user - res combination
private double dValue; // used for time
private boolean userBased;
private boolean resBased;
private boolean bookmarkBLL;
private List<List<Bookmark>> userBookmarks;
List<Map<Integer, Double>> resMaps;
private CooccurenceMatrix rMatrix;
private CalculationType cType;
private List<Map<Integer, Double>> userCounts;
private List<Map<Integer, Double>> resCounts;
public List<Map<Integer, Double>> getUserMaps() {
return this.userCounts;
}
public ThreeLTCalculator(BookmarkReader reader, int trainSize, int dValue, int beta, boolean userBased, boolean resBased, boolean bookmarkBLL, CalculationType cType) {
this.reader = reader;
this.trainList = this.reader.getBookmarks().subList(0, trainSize);
this.userBookmarks = Utilities.getBookmarks(this.trainList, false);
this.beta = (double)beta / 10.0;
this.dValue = (double)dValue / 10.0;
this.userBased = userBased;
this.resBased = resBased;
this.bookmarkBLL = bookmarkBLL;
this.cType = cType;
this.resMaps = BLLCalculator.getArtifactMaps(reader, this.trainList, null, true, new ArrayList<Long>(), new ArrayList<Double>(), 0, true, null);
this.userCounts = Utilities.getRelativeTagMaps(this.trainList, false);
this.resCounts = Utilities.getRelativeTagMaps(this.trainList, true);
if (this.cType == CalculationType.USER_TO_RESOURCE) {
this.rMatrix = new CooccurenceMatrix(this.trainList, this.reader.getTagCounts(), true);
}
}
private Map<Integer, Double> getLastUsages(List<Bookmark> bookmarks, double timestamp, boolean categories) {
Map<Integer, Double> usageMap = new LinkedHashMap<Integer, Double>();
for (Bookmark data : bookmarks) {
List<Integer> keys = (categories ? data.getCategories() : data.getTags());
double targetTimestamp = Double.parseDouble(data.getTimestamp());
for (int key : keys) {
Double val = usageMap.get(key);
if (val == null || targetTimestamp > val.doubleValue()) {
usageMap.put(key, targetTimestamp);
}
}
}
for (Map.Entry<Integer, Double> entry : usageMap.entrySet()) {
Double rec = Math.pow(timestamp - entry.getValue() + 1.0, this.dValue * (-1.0));
//Double rec = Math.exp((timestamp - entry.getValue() + 1.0) * -1.0);
if (!rec.isInfinite() && !rec.isNaN()) {
entry.setValue(rec.doubleValue());
} else {
System.out.println("BLL - NAN");
entry.setValue(0.0);
}
}
return usageMap;
}
public Map<Integer, Double> getRankedTagList(int userID, int resID, List<Integer> testCats, double testTimestamp, int limit, boolean tagBLL, boolean topicBLL, boolean sorting) {
Map<Integer, Double> userResultMap = null;
if (this.userBased) {
List<Bookmark> userB = null;
Map<Integer, Double> userTagMap = null;
Map<Integer, Double> userCatMap = null;
Map<Integer, Double> userCount = null;
Map<Integer, Double> resCount = null;
if (userID != -1 && userID < this.userBookmarks.size()) {
userB = this.userBookmarks.get(userID);
userCount = this.userCounts.get(userID);
if (tagBLL) {
userTagMap = getLastUsages(userB, testTimestamp, false);
}
if (topicBLL) {
userCatMap = getLastUsages(userB, testTimestamp, true);
}
} else {
userB = new ArrayList<Bookmark>();
userCount = new LinkedHashMap<Integer, Double>();
}
userResultMap = getResultMap(userB, testCats, userTagMap, userCatMap, testTimestamp, topicBLL);
if (this.cType == CalculationType.USER_TO_RESOURCE && resID < this.resCounts.size()) {
resCount = this.resCounts.get(resID);
Map<Integer, Double> associativeValues = this.rMatrix.calculateAssociativeComponentsWithTagAssosiation(userCount, resCount, false, true, false);
for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
Double val = userResultMap.get(entry.getKey());
userResultMap.put(entry.getKey(), val == null ? entry.getValue().doubleValue() : val.doubleValue() + entry.getValue().doubleValue());
}
double denom = 0.0;
for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) {
double val = Math.log(entry.getValue());
denom += Math.exp(val);
}
for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) {
entry.setValue(Math.exp(Math.log(entry.getValue())) / denom);
}
}
} else {
userResultMap = new LinkedHashMap<Integer, Double>();
}
Map<Integer, Double> resResultMap = null;
if (this.resBased) {
if (this.resMaps != null) {
if (resID != -1 && resID < this.resMaps.size()) {
resResultMap = this.resMaps.get(resID);
} else {
resResultMap = new LinkedHashMap<Integer, Double>();
}
}
}
// merge user and resource results
Map<Integer, Double> resultMap = new LinkedHashMap<Integer, Double>();
for (int i = 0; i < this.reader.getTags().size(); i++) {
double userVal = 0.0;
if (userResultMap != null && userResultMap.containsKey(i)) {
userVal = userResultMap.get(i);
}
double resVal = 0.0;
if (resResultMap != null && resResultMap.containsKey(i)) {
resVal = resResultMap.get(i);
}
if (userVal != 0.0 || resVal != 0.0) {
resultMap.put(i, this.beta * userVal + (1.0 - this.beta) * resVal);
//resultMap.put(i, userVal + resVal);
}
}
// sort and return
Map<Integer, Double> returnMap = new LinkedHashMap<Integer, Double>();
Map<Integer, Double> sortedResultMap = null;
if (sorting) {
sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(resultMap));
sortedResultMap.putAll(resultMap);
} else {
sortedResultMap = resultMap;
}
int count = 0;
for (Map.Entry<Integer, Double> entry : sortedResultMap.entrySet()) {
if (count++ < limit) {
returnMap.put(entry.getKey(), entry.getValue());
} else {
break;
}
}
return returnMap;
}
public Map<Integer, Double> getCollectiveRankedTagList(List<Integer> testCats, double testTimestamp, int limit, boolean tagBLL, boolean topicBLL) {
Map<Integer, Double> collectiveTagMap = new LinkedHashMap<Integer, Double>();
List<Bookmark> bookmarks = this.reader.getBookmarks();
List<Map<Integer, Integer>> resTopics = Utilities.getResTopics(bookmarks);
for (Bookmark b : bookmarks) {
if (b.getResourceID() < resTopics.size()) {
double sim = Utilities.getCosineSimList(testCats, new ArrayList<Integer>(resTopics.get(b.getResourceID()).keySet()));
Double ajhid = Math.pow(sim, 3);
if (ajhid.isNaN() || ajhid.isInfinite()) {
ajhid = 0.0;
System.out.println("Cos - NAN");
}
if (ajhid == 0.0) {
ajhid = 0.1;
}
for (int t : b.getTags()) {
Double tVal = collectiveTagMap.get(t);
collectiveTagMap.put(t, tVal == null ? ajhid : tVal.doubleValue() + ajhid);
}
}
}
Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(collectiveTagMap));
sortedResultMap.putAll(collectiveTagMap);
Map<Integer, Double> returnMap = new LinkedHashMap<Integer, Double>();
for (Map.Entry<Integer, Double> entry : sortedResultMap.entrySet()) {
//if (returnMap.size() < limit) {
returnMap.put(entry.getKey(), entry.getValue());
//} else {
// break;
//}
}
return returnMap;
}
private Map<Integer, Double> getResultMap(List<Bookmark> bookmarks, List<Integer> testCats, Map<Integer, Double> userTagMap, Map<Integer, Double> userCatMap, double testTimestamp, boolean topicBLL) {
Map<Integer, Double> resultMap = new LinkedHashMap<Integer, Double>();
Map<Integer, Double> testCatsMap = getMapFromList(testCats, null);
for (Bookmark data : bookmarks) {
// old version for topicBLL
Map<Integer, Double> catsMap = getMapFromList(data.getCategories(), null/*userCatMap*/);
//Double ajhid = Math.exp((1.0 - Utilities.getCosineFloatSim(catsMap, testCatsMap)) * (-1.0));
Double sim = Utilities.getCosineFloatSim(catsMap, testCatsMap);
Double ajhid = Math.pow(sim, 3);
if (ajhid.isNaN() || ajhid.isInfinite()) {
ajhid = 0.0;
System.out.println("Cos - NAN");
}
// new version for topicBLL
if (topicBLL) {
double topicRecSum = 0.0;
Map<Integer, Double> catsRecMap = getMapFromList(data.getCategories(), userCatMap);
for (double catRec : catsRecMap.values()) {
topicRecSum += catRec;
}
ajhid *= topicRecSum;
}
if (this.bookmarkBLL) {
ajhid *= getBookmarkBLL(data, testTimestamp);
}
Map<Integer, Double> tagsMap = getMapFromList(data.getTags(), userTagMap);
for (Map.Entry<Integer, Double> entry : tagsMap.entrySet()) {
Double akout = ajhid.doubleValue() * entry.getValue().doubleValue();
Double value = resultMap.get(entry.getKey());
resultMap.put(entry.getKey(), value == null ? akout.doubleValue() : value.doubleValue() + akout.doubleValue());
}
}
// normalize and return
double denom = 0.0;
for (Map.Entry<Integer, Double> entry : resultMap.entrySet()) {
double val = 0.0;
if (entry.getValue() != 0.0) {
val = Math.log(entry.getValue());
}
denom += Math.exp(val);
entry.setValue(val);
}
for (Map.Entry<Integer, Double> entry : resultMap.entrySet()) {
entry.setValue(Math.exp(entry.getValue()) / denom);
}
return resultMap;
}
private double getBookmarkBLL(Bookmark data, double testTimestamp) {
Double rec = Math.pow(testTimestamp - Double.parseDouble(data.getTimestamp()) + 1.0, this.dValue * (-1.0));
if (!rec.isInfinite() && !rec.isNaN()) {
return Math.log(rec + 1.0);
}
System.out.println("Bookmark-BLL - NAN");
return Math.log(1.0);
}
private Map<Integer, Double> getMapFromList(List<Integer> keys, Map<Integer, Double> values) {
Map<Integer, Double> map = new LinkedHashMap<Integer, Double>();
for (int key : keys) {
if (values != null && values.containsKey(key)) {
Double val = Math.log(values.get(key) + 1.0);
if (!val.isNaN() && !val.isInfinite()) {
map.put(key, Math.log(values.get(key) + 1.0));
} else {
map.put(key, Math.log(1.0 + 1.0));
}
//map.put(key, values.get(key));
} else {
map.put(key, Math.log(1.0 + 1.0));
//map.put(key, 1.0);
}
}
return map;
}
/*
private Map<Integer, Double> getAllUsages(List<Bookmark> bookmarks, double timestamp, boolean categories) {
Map<Integer, Double> usageMap = new LinkedHashMap<Integer, Double>();
for (Bookmark data : bookmarks) {
List<Integer> keys = (categories ? data.getCategories() : data.getTags());
double targetTimestamp = Double.parseDouble(data.getTimestamp());
Double rec = Math.pow(timestamp - targetTimestamp + 1.0, this.dValue * (-1.0));
if (!rec.isInfinite() && !rec.isNaN()) {
for (int key : keys) {
Double oldVal = usageMap.get(key);
usageMap.put(key, (oldVal != null ? oldVal + rec : rec));
}
} else {
System.out.println("BLL - NAN");
}
}
return usageMap;
}
*/
// Statics -----------------------------------------------------------------------------------------------------------------------
private static String timeString;
public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, int d, int beta, boolean userBased, boolean resBased,
boolean tagBLL, boolean topicBLL, CalculationType cType) {
Timer timerThread = new Timer();
MemoryThread memoryThread = new MemoryThread();
timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN);
BookmarkReader reader = new BookmarkReader(trainSize, false);
reader.readFile(filename);
List<int[]> predictionValues = new ArrayList<int[]>();
Stopwatch timer = new Stopwatch();
timer.start();
ThreeLTCalculator calculator = new ThreeLTCalculator(reader, trainSize, d, beta, userBased, resBased, false, cType);
timer.stop();
long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS);
timer.reset();
timer.start();
for (int i = trainSize; i < trainSize + sampleSize; i++) { // the test-set
Bookmark data = reader.getBookmarks().get(i);
long timestamp = Long.parseLong((data.getTimestamp()));
Map<Integer, Double> map = calculator.getRankedTagList(data.getUserID(), data.getResourceID(), data.getCategories(), timestamp, 10, tagBLL, topicBLL, true);
predictionValues.add(Ints.toArray(map.keySet()));
}
timer.stop();
long testTime = timer.elapsed(TimeUnit.MILLISECONDS);
timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize);
String suffix = "_layers";
if (!userBased) {
suffix = "_reslayers";
} else if (!resBased) {
suffix = "_userlayers";
}
if (tagBLL && topicBLL) {
suffix += "bll";
} else if (tagBLL) {
suffix += "tagbll";
} else if (topicBLL) {
suffix += "topicbll";
}
if (cType == CalculationType.USER_TO_RESOURCE) {
suffix += "ac";
}
String outputFile = filename + suffix + "_" + beta + "_" + d;
reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size()));
PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues);
writer.writeFile(outputFile);
timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory());
timerThread.cancel();
Utilities.writeStringToFile("./data/metrics/" + outputFile + "_TIME.txt", timeString);
return reader;
}
}