/* TagRecommender: A framework to implement and evaluate algorithms for the recommendation of tags. Copyright (C) 2013 Dominik Kowald, Emanuel Lacic This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package itemrecommendations; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; import processing.BLLCalculator; import com.google.common.base.Stopwatch; import com.google.common.primitives.Ints; import common.DoubleMapComparator; import common.Features; import common.Similarity; import common.Bookmark; import common.Utilities; import file.PredictionFileWriter; import file.BookmarkReader; public class CIRTTCalculator { public static int MAX_NEIGHBORS = 20; private BookmarkReader reader; private List<Bookmark> trainList; private List<Bookmark> testList; private Similarity sim; private Features features; private boolean userSim; private boolean bll; private boolean novelty; private List<Map<Integer, Double>> userMaps; //private List<Map<Integer, Double>> userTags; private List<Map<Integer, Double>> userTopics; private Map<Integer, Double> allUsers; //private List<Map<Integer, Double>> resMaps; //private List<Map<Integer, Double>> resTags; private List<Map<Integer, Double>> resTopics; // private Map<Integer, Double> allResources; private List<Map<Integer, Double>> bllValues; private boolean calculateOnTags; public CIRTTCalculator(BookmarkReader reader, int trainSize, int sampleSize, Similarity sim, Features features, boolean userSim, boolean bll, boolean novelty, boolean calculateOnTags) { this.reader = reader; this.trainList = this.reader.getBookmarks().subList(0, trainSize); this.testList = this.reader.getBookmarks().subList(trainSize, trainSize + sampleSize); this.sim = sim; this.features = features; this.userSim = userSim; this.bll = bll; this.novelty = novelty; this.calculateOnTags = calculateOnTags; if (this.features == Features.ENTITIES) { // TODO: try tag values for cosine! this.userMaps = Utilities.getUsedEntities(this.trainList, false, null); } else if (this.features == Features.TAGS) { this.userMaps = Utilities.getRelativeTagMaps(this.trainList, false); } else { this.userMaps = Utilities.getRelativeTopicMaps(this.trainList, false); } this.userTopics = Utilities.getUniqueTopicMaps(this.trainList, false);//Utilities.getRelativeTopicMaps(this.trainList, false); this.allUsers = Utilities.getAllEntities(this.trainList, false); if (this.bll) { this.bllValues = BLLCalculator.getArtifactMaps(reader, this.trainList, this.testList, false, new ArrayList<Long>(), new ArrayList<Double>(), 0.5, true, null); } /*if (this.features == Features.ENTITIES) { this.resMaps = Utilities.getUsedEntities(this.trainList, true, null); } else if (this.features == Features.TAGS) { this.resMaps = Utilities.getRelativeTagMaps(this.trainList, true); } else { this.resMaps = Utilities.getRelativeTopicMaps(this.trainList, true); }*/ //this.resTopics = Utilities.getUniqueTopicMaps(this.trainList, true); //Utilities.getUsedEntities(this.trainList, true, null); // this.resTopics = Utilities.getUsedEntities(this.trainList, true, null); //this.resTopics = Utilities.getRelativeTagMaps(this.trainList, true); //this.resTopics = Utilities.getRelativeTopicMaps(this.trainList, true); //this.allResources = Utilities.getAllEntities(this.trainList, true); } private Map<Integer, Double> getRankedResourcesList(int userID, boolean sorting) { Map<Integer, Double> rankedResources = new LinkedHashMap<Integer, Double>(); Map<Integer, Double> candidateResources = new LinkedHashMap<Integer, Double>(); // TODO: check if Resources are modeled with BLL values of tags Map<Integer, Double> userResources = null; if (calculateOnTags) { userResources = Bookmark.getResourcesFromUserWithBLL(this.trainList, this.testList, userID, this.bllValues); } else { userResources = Bookmark.getResourcesFromUserWithRec(trainList, testList, userID, 0.5, false); } Map<Integer, Double> targetUserTopics = this.userTopics.get(userID); // get candidates int i = 0; Map<Integer, Double> sortedNeighbors = Utilities.getNeighbors(userID, -1, this.allUsers, this.userMaps, this.trainList, this.sim, true); for (Map.Entry<Integer, Double> neighbor : sortedNeighbors.entrySet()) { if (i++ > MAX_NEIGHBORS) { break; } double userSimVal = neighbor.getValue(); // if (userSimVal != 0.0) { Map<Integer, Double> resources = Bookmark.getResourcesFromUserWithBLL(this.trainList, this.testList, neighbor.getKey(), this.bllValues); // Map<Integer, Double> resources = Bookmark.getResourcesFromUserWithRec(trainList, testList, neighbor.getKey(), 0.5, false); for (Integer resID : resources.keySet()) { if (!userResources.containsKey(resID)) { // exclude already known resources Double val = candidateResources.get(resID); candidateResources.put(resID, (val != null ? val + userSimVal : userSimVal )); } } // } } // rank cadidates for (Map.Entry<Integer, Double> candidateRes : candidateResources.entrySet()) { Map<Integer, Double> candidateTopics = this.resTopics.get(candidateRes.getKey()); double echoVal = 0.0; for (Map.Entry<Integer, Double> userRes : userResources.entrySet()) { Map<Integer, Double> targetTopics = this.resTopics.get(userRes.getKey()); Double resSimVal = Utilities.getCosineFloatSim(targetTopics, candidateTopics); //resSimVal = Math.pow(resSimVal, 3); if (!resSimVal.isNaN() && !resSimVal.isInfinite() && resSimVal.doubleValue() > 0.0) { double bllVal = userRes.getValue().doubleValue() > 0 ? userRes.getValue().doubleValue() : 1.0; double currentEcho = resSimVal * bllVal; //if (this.novelty) { // here? //} echoVal += currentEcho; } } // TODO: use importance of this resource if (this.userSim) { echoVal *= candidateRes.getValue(); } if (this.novelty) { double novValue = 1.0 - Utilities.getCosineFloatSim(targetUserTopics, candidateTopics); echoVal *= novValue; } rankedResources.put(candidateRes.getKey(), echoVal); } /* System.out.print("\"" + this.reader.getUsers().get(userID) + "\";\""); int count = 1; for (int candRes : candidateResources.keySet()) { System.out.print(this.reader.getResources().get(candRes)); if (count++ < candidateResources.keySet().size()) { System.out.print(","); } } System.out.print("\"\n"); */ // denom = 0.0; // // normalize // for (double val : rankedResources.values()) { // denom += Math.exp(val); // } // for (Map.Entry<Integer, Double> entry : rankedResources.entrySet()) { // entry.setValue(Math.exp(entry.getValue()) / denom); // } if (sorting) { // return the sorted resources Map<Integer, Double> sortedRankedResources = new TreeMap<Integer, Double>(new DoubleMapComparator(rankedResources)); sortedRankedResources.putAll(rankedResources); return sortedRankedResources; } else { return rankedResources; } } // Statics ----------------------------------------------------------------------------------------------------------------------------------------------------------- private static List<Map<Integer, Double>> start3LTCreationForResourcesPrediction(BookmarkReader reader, int trainSize, int sampleSize, Features features, boolean userSim, boolean bll, boolean novelty, boolean calculateOnTags) { int size = reader.getBookmarks().size(); CIRTTCalculator calculator = new CIRTTCalculator(reader, trainSize, sampleSize, Similarity.BM25, features, userSim, bll, novelty, calculateOnTags); List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>(); for (Integer userID : reader.getUniqueUserListFromTestSet(trainSize)) { Map<Integer, Double> map = null; map = calculator.getRankedResourcesList(userID, true); results.add(map); } return results; } public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, int neighborSize, Features features, boolean userSim, boolean bll, boolean novelty, boolean calculateOnTags) { MAX_NEIGHBORS = neighborSize; // read input //filename += "_res"; BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); // get recommendations List<Map<Integer, Double>> cfValues = null; cfValues = start3LTCreationForResourcesPrediction(reader, trainSize, sampleSize, features, userSim, bll, novelty, calculateOnTags); // write results List<int[]> predictionValues = new ArrayList<int[]>(); for (int i = 0; i < cfValues.size(); i++) { Map<Integer, Double> modelVal = cfValues.get(i); predictionValues.add(Ints.toArray(modelVal.keySet())); } String suffix = "_r3l_" + features; if (bll) { suffix += "_bll"; } PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); writer.writeResourcePredictionsToFile(filename + suffix, trainSize, MAX_NEIGHBORS); return reader; } }