/* TagRecommender: A framework to implement and evaluate algorithms for the recommendation of tags. Copyright (C) 2013 Dominik Kowald This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package itemrecommendations; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Timer; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import com.google.common.base.Stopwatch; import com.google.common.primitives.Ints; import common.DoubleMapComparator; import common.Features; import common.MemoryThread; import common.PerformanceMeasurement; import common.Similarity; import common.Bookmark; import common.Utilities; import file.PredictionFileWriter; import file.BookmarkReader; public class CFResourceCalculator { public static int MAX_NEIGHBORS = 20; //private final static double K1 = 1.2; //private final static double K3 = 1.2; //private final static double B = 0.8; private BookmarkReader reader; private boolean userBased; private boolean resBased; //private double beta; Similarity sim; private List<Bookmark> trainList; private List<Bookmark> testList; private List<Map<Integer, Double>> userMaps; private Map<Integer, Double> allUsers; private List<Map<Integer, Double>> resMaps; private Map<Integer, Double> allResources; private CFResourceCalculator rankedResourceCalculator; public CFResourceCalculator(BookmarkReader reader, int trainSize, boolean predictTags, boolean userBased, boolean resBased, int beta, Similarity sim, Features features) { this.reader = reader; this.userBased = userBased; this.resBased = resBased; //this.beta = (double)beta / 10.0; this.sim = sim; //this.trainList = this.reader.getUserLines().subList(0, predictTags ? trainSize : reader.getUserLines().size()); // TODO this.trainList = this.reader.getBookmarks().subList(0, trainSize); this.testList = this.reader.getBookmarks().subList(trainSize, this.reader.getBookmarks().size()); if (this.userBased || !predictTags) { if (features == Features.ENTITIES) { this.userMaps = Utilities.getUsedEntities(this.trainList, false, null); } else if (features == Features.TOPICS) { this.userMaps = Utilities.getRelativeTopicMaps(this.trainList, false);//Utilities.getUserTopics(this.trainList); } else if (features == Features.TAGS) { this.userMaps = Utilities.getRelativeTagMaps(this.trainList, false);//Utilities.getUserMaps(this.trainList); } else if (features == Features.TAG_ENTITIES) { this.userMaps = Utilities.getUsedEntities(this.trainList, false, Utilities.getRelativeTagMaps(this.trainList, false)); } this.allUsers = Utilities.getAllEntities(this.trainList, false); } if (this.resBased) { if (features == Features.ENTITIES) { this.resMaps = Utilities.getUsedEntities(this.trainList, true, null); } else if (features == Features.TOPICS) { this.resMaps = Utilities.getRelativeTopicMaps(this.trainList, true);//Utilities.getResTopics(this.trainList); this.rankedResourceCalculator = new CFResourceCalculator(this.reader, trainSize, false, true, false, 5, Similarity.COSINE, Features.ENTITIES); } else if (features == Features.TAGS) { this.resMaps = Utilities.getRelativeTagMaps(this.trainList, true);//Utilities.getResMaps(this.trainList); } else if (features == Features.TAG_ENTITIES) { this.resMaps = Utilities.getUsedEntities(this.trainList, true, Utilities.getRelativeTagMaps(this.trainList, true)); } this.allResources = Utilities.getAllEntities(this.trainList, true); } } public Map<Integer, Double> getRankedResourcesList(int userID, int resID, boolean sorting, boolean allResources, boolean bll, boolean filterOwnEntities, boolean recommUsers) { List<Integer> userResources = null; Map<Integer, Double> userBllResources = null; if (this.resBased) { userBllResources = Bookmark.getResourcesFromUserWithRec(this.trainList, this.testList, userID, 0.5, bll); userResources = new ArrayList<Integer>(userBllResources.keySet()); } else if (userID != -1) { userResources = Bookmark.getResourcesFromUser(this.trainList, userID); } Map<Integer, Double> rankedResources = new LinkedHashMap<Integer, Double>(); int i = 0; //double denom = 0.0; if (this.userBased && userID != -1) { Map<Integer, Double> sortedNeighbors = Utilities.getNeighbors(userID, -1, this.allUsers, this.userMaps, this.trainList, this.sim, !recommUsers); if (recommUsers) { return sortedNeighbors; } for (Map.Entry<Integer, Double> neighbor : sortedNeighbors.entrySet()) { if (i++ > MAX_NEIGHBORS) { break; } if (bll) { userBllResources = Bookmark.getResourcesFromUserWithRec(this.trainList, this.testList, neighbor.getKey(), 0.5, false); } double bm25 = neighbor.getValue(); //denom += bm25; if (bm25 != 0.0) { List<Integer> resources = Bookmark.getResourcesFromUser(this.trainList, neighbor.getKey()); for (Integer res : resources) { if (!filterOwnEntities || !userResources.contains(res)) { double bllVal = (bll ? userBllResources.get(res) : 1.0); Double val = rankedResources.get(res); double entryVal = bllVal * bm25; rankedResources.put(res, (val != null ? val + entryVal : entryVal)); //System.out.println("add resource to list - " + resID + " " + (val != null ? val + bm25 : bm25)); } } } } } if (this.resBased) { //denom = 0.0; Map<Integer, Double> sortedResources = null; if (allResources) { sortedResources = new LinkedHashMap<Integer, Double>(); int resCount = 0; for (Map.Entry<Integer, Double> resEntry : userBllResources.entrySet()) { if (resCount++ > MAX_NEIGHBORS) { break; } int res = resEntry.getKey(); i = 0; Double bllVal = (bll && userBllResources != null ? resEntry.getValue() : 1.0); Map<Integer, Double> resources = Utilities.getSimResources(-1, res, userResources, this.allResources, this.resMaps, this.trainList, this.sim, sorting); for (Map.Entry<Integer, Double> entry : resources.entrySet()) { if (i++ > MAX_NEIGHBORS) { break; } Double val = sortedResources.get(entry.getKey()); double entryVal = (bllVal != null ? bllVal.doubleValue() : 1.0) * entry.getValue(); sortedResources.put(entry.getKey(), val != null ? val.doubleValue() + entryVal : entryVal); } } } else { if (recommUsers) { List<Integer> resourceUsers = null; if (filterOwnEntities) { resourceUsers = Bookmark.getUsersFromResource(this.trainList, resID); } else { resourceUsers = new ArrayList<Integer>(); } return Utilities.getSimUsersForResource(resID, this.allUsers, this.userMaps, this.resMaps, resourceUsers, this.sim, sorting); } else { if (userID != -1) { /*Map<Integer, Double> candidateSet = new LinkedHashMap<Integer, Double>(); for (Map.Entry<Integer, Double> entry : this.rankedResourceCalculator.getRankedResourcesList(userID, -1, true, false, false, true, false).entrySet()) { if (candidateSet.size() < 100) { candidateSet.put(entry.getKey(), entry.getValue()); } }*/ sortedResources = Utilities.getSimResourcesForUser(userID, this.allResources, this.userMaps, this.resMaps, filterOwnEntities ? userResources : new ArrayList<Integer>(), this.sim, sorting); } else if (resID != -1) { sortedResources = Utilities.getSimResources(-1, resID, null, this.allResources, this.resMaps, this.trainList, this.sim, sorting); } } } for (Map.Entry<Integer, Double> sortedRes : sortedResources.entrySet()) { Double val = rankedResources.get(sortedRes.getKey()); rankedResources.put(sortedRes.getKey(), val != null ? val.doubleValue() + sortedRes.getValue() : sortedRes.getValue()); } } // denom = 0.0; // // normalize // for (double val : rankedResources.values()) { // denom += Math.exp(val); // } // for (Map.Entry<Integer, Double> entry : rankedResources.entrySet()) { // entry.setValue(Math.exp(entry.getValue()) / denom); // } if (sorting) { // return the sorted resources Map<Integer, Double> sortedRankedResources = new TreeMap<Integer, Double>(new DoubleMapComparator(rankedResources)); sortedRankedResources.putAll(rankedResources); return sortedRankedResources; } else { return rankedResources; } } // Statics ----------------------------------------------------------------------------------------------------------------------------------------------------------- private static String timeString; public static BookmarkReader predictResources(String filename, int trainSize, int sampleSize, int neighborSize, boolean userBased, boolean resourceBased, boolean allResources, boolean bll, Features features, boolean writeTime) { MAX_NEIGHBORS = neighborSize; return predictSample(filename, trainSize, sampleSize, userBased, resourceBased, allResources, 5, bll, features, writeTime); } private static List<Map<Integer, Double>> startBM25CreationForResourcesPrediction(BookmarkReader reader, int sampleSize, boolean userBased, boolean resBased, boolean allResources, boolean bll, Features features) { int size = reader.getBookmarks().size(); int trainSize = size - sampleSize; Stopwatch timer = new Stopwatch(); timer.start(); CFResourceCalculator calculator = new CFResourceCalculator(reader, trainSize, false, userBased, resBased, 5, Similarity.COSINE, features); timer.stop(); long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); timer.reset(); timer.start(); List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>(); for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) { Map<Integer, Double> map = null; map = calculator.getRankedResourcesList(userID, -1, true, allResources, bll, true, false); // TODO results.add(map); } timer.stop(); long testTime = timer.elapsed(TimeUnit.MILLISECONDS); timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize); return results; } public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased, boolean resBased, boolean allResources, int beta, boolean bll, Features features, boolean writeTime) { Timer timerThread = new Timer(); MemoryThread memoryThread = new MemoryThread(); timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN); BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); List<Map<Integer, Double>> cfValues = null; cfValues = startBM25CreationForResourcesPrediction(reader, sampleSize, userBased, resBased, allResources, bll, features); List<int[]> predictionValues = new ArrayList<int[]>(); for (int i = 0; i < cfValues.size(); i++) { Map<Integer, Double> modelVal = cfValues.get(i); predictionValues.add(Ints.toArray(modelVal.keySet())); // just for debugging //System.out.println(modelVal.values().toString()); } String suffix = "_cf_"; if (!userBased) { suffix = "_rescf_"; } else if (!resBased) { suffix = "_usercf_"; } if (!userBased && !allResources) { suffix += "mixed_"; } if (bll) { suffix += "bll_"; } suffix += features + "_"; PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); writer.writeResourcePredictionsToFile(filename + suffix + beta, trainSize, MAX_NEIGHBORS); timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory()); timerThread.cancel(); if (writeTime) { Utilities.writeStringToFile("./data/metrics/" + filename + suffix + beta + "_TIME.txt", timeString); } return reader; } }