/* TagRecommender: A framework to implement and evaluate algorithms for the recommendation of tags. Copyright (C) 2013 Dominik Kowald This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package processing; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Timer; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import com.google.common.base.Stopwatch; import com.google.common.primitives.Ints; import common.CalculationType; import common.CooccurenceMatrix; import common.DoubleMapComparator; import common.Bookmark; import common.MapUtil; import common.MemoryThread; import common.PerformanceMeasurement; import common.Utilities; import file.PredictionFileWriter; import file.BookmarkReader; public class BLLCalculator { private final static int REC_LIMIT = 10; private BookmarkReader reader; private double dVal; private double beta; private boolean userBased; private boolean resBased; private List<Map<Integer, Double>> userMaps; private List<Map<Integer, Double>> userCounts; private List<Double> userDenoms; private List<Long> userTimestamps; private List<Map<Integer, Double>> resMaps; private List<Map<Integer, Double>> resCounts; private List<Double> resDenoms; private List<Long> resTimestamps; private List<Bookmark> trainList; private CooccurenceMatrix rMatrix; public BLLCalculator(BookmarkReader reader, int trainSize, double dVal, int beta, boolean userBased, boolean resBased, CalculationType cType, Double lambda) { this.reader = reader; this.dVal = dVal;//(double)dVal / 10.0; this.beta = (double)beta / 10.0; this.userBased = userBased; this.resBased = resBased; this.trainList = this.reader.getBookmarks().subList(0, trainSize); List<Bookmark> testList = this.reader.getBookmarks().subList(trainSize, reader.getBookmarks().size()); this.userDenoms = new ArrayList<Double>(); this.userTimestamps = new ArrayList<Long>(); //if (this.userBased) { this.userMaps = getArtifactMaps(reader, this.trainList, testList, false, this.userTimestamps, this.userDenoms, this.dVal, true, lambda); this.userCounts = Utilities.getRelativeTagMaps(this.trainList, false); this.resCounts = Utilities.getRelativeTagMaps(this.trainList, true); if (cType != CalculationType.NONE) { this.rMatrix = new CooccurenceMatrix(this.trainList, reader.getTagCounts(), true); } //} this.resDenoms = new ArrayList<Double>(); this.resTimestamps = new ArrayList<Long>(); //if (this.resBased) { this.resMaps = getArtifactMaps(reader, this.trainList, testList, true, this.resTimestamps, this.resDenoms, this.dVal, true, null); //} } public Map<Integer, Double> getRankedTagList(int userID, int resID, boolean sorting, CalculationType cType) { Map<Integer, Double> userResultMap = new LinkedHashMap<Integer, Double>(); Map<Integer, Double> resResultMap = new LinkedHashMap<Integer, Double>(); Map<Integer, Double> resultMap = new LinkedHashMap<Integer, Double>(); Map<Integer, Double> userMap = null; Map<Integer, Double> userCount = null; Map<Integer, Double> resMap = null; Map<Integer, Double> resCount = null; if (this.userBased && this.userMaps != null && userID < this.userMaps.size()) { userMap = this.userMaps.get(userID); userCount = this.userCounts.get(userID); if (!cType.equals(CalculationType.USER_TO_RESOURCE_ONLY)) { for (Map.Entry<Integer, Double> entry : userMap.entrySet()) { double userVal = entry.getValue().doubleValue(); userResultMap.put(entry.getKey(), userVal); } } if ((cType.equals(CalculationType.USER_TO_RESOURCE_ONLY) || cType.equals(CalculationType.USER_TO_RESOURCE) || cType.equals(CalculationType.BOTH)) && resID < this.resMaps.size()){ resMap = this.resMaps.get(resID); resCount = this.resCounts.get(resID); Map<Integer, Double> associativeValues = this.rMatrix.calculateAssociativeComponentsWithTagAssosiation(userCount, resCount, false, true, false); /* double denom = 0.0; for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) { double val = Math.log(entry.getValue()); denom += Math.exp(val); } for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) { entry.setValue(Math.exp(Math.log(entry.getValue())) / denom); } */ for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) { Double val = userResultMap.get(entry.getKey()); userResultMap.put(entry.getKey(), val == null ? entry.getValue().doubleValue() : val.doubleValue() + entry.getValue().doubleValue()); } double denom = 0.0; for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) { double val = Math.log(entry.getValue()); denom += Math.exp(val); } for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) { entry.setValue(Math.exp(Math.log(entry.getValue())) / denom); } } for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) { double entryVal = this.beta * entry.getValue().doubleValue(); Double val = resultMap.get(entry.getKey()); resultMap.put(entry.getKey(), val == null ? entryVal : val.doubleValue() + entryVal); } } if (this.resBased) { if (this.resMaps != null) { if (resID < this.resMaps.size()) { if (resMap == null || resCount == null) { resMap = this.resMaps.get(resID); resCount = this.resCounts.get(resID); } if (!cType.equals(CalculationType.RESOURCE_TO_USER_ONLY)) { for (Map.Entry<Integer, Double> entry : resMap.entrySet()) { double resVal = entry.getValue().doubleValue(); Double val = resResultMap.get(entry.getKey()); resResultMap.put(entry.getKey(), val == null ? resVal : val.doubleValue() + resVal); } } if ((cType.equals(CalculationType.RESOURCE_TO_USER_ONLY) || cType.equals(CalculationType.RESOURCE_TO_USER) || cType.equals(CalculationType.BOTH)) && userID < this.userMaps.size()) { userMap = this.userMaps.get(userID); userCount = this.userCounts.get(userID); Map<Integer, Double> associativeValues = this.rMatrix.calculateAssociativeComponentsWithTagAssosiation(resCount, userCount, false, false, true); double denom = 0.0; /* for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) { double val = Math.log(entry.getValue()); denom += Math.exp(val); } for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) { entry.setValue(Math.exp(Math.log(entry.getValue())) / denom); } */ for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) { Double val = resResultMap.get(entry.getKey()); resResultMap.put(entry.getKey(), val == null ? entry.getValue().doubleValue() : val.doubleValue() + entry.getValue().doubleValue()); } denom = 0.0; for (Map.Entry<Integer, Double> entry : resResultMap.entrySet()) { double val = Math.log(entry.getValue()); denom += Math.exp(val); } for (Map.Entry<Integer, Double> entry : resResultMap.entrySet()) { entry.setValue(Math.exp(Math.log(entry.getValue())) / denom); } } } } for (Map.Entry<Integer, Double> entry : resResultMap.entrySet()) { double entryVal = (1.0 - this.beta) * entry.getValue().doubleValue(); Double val = resultMap.get(entry.getKey()); resultMap.put(entry.getKey(), val == null ? entryVal : val.doubleValue() + entryVal); } } /*if (resultMap.size() == 0) { double i = 10.0; for (int tag : BaselineCalculator.getPopularTagList(this.reader, 10)) { resultMap.put(tag, i--); } } */ if (sorting) { Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(resultMap)); sortedResultMap.putAll(resultMap); //Map<Integer, Double> sortedResultMap = MapUtil.sortByValue(resultMap); Map<Integer, Double> returnMap = new LinkedHashMap<Integer, Double>(REC_LIMIT); int i = 0; for (Map.Entry<Integer, Double> entry : sortedResultMap.entrySet()) { if (i++ < REC_LIMIT) { returnMap.put(entry.getKey(), entry.getValue()); } else { break; } } return returnMap; } return resultMap; } // Basis activations values for each user public static List<Map<Integer, Double>> getArtifactMaps(BookmarkReader reader, List<Bookmark> userLines, List<Bookmark> testLines, boolean resource, List<Long> timestampList, List<Double> denomList, double dVal, boolean normalize, Double lambda) { List<Map<Integer, Double>> maps = new ArrayList<Map<Integer, Double>>(); for (Bookmark data : userLines) { int refID = 0; //System.out.println(data); if (resource) { refID = data.getResourceID(); } else { refID = data.getUserID(); } long baselineTimestamp = -1; if (refID >= maps.size()) { if (resource) { //refIDs = Utilities.getUsersByResource(userLines, data.getWikiID()); baselineTimestamp = 1; } else { baselineTimestamp = Utilities.getBaselineTimestamp(testLines, refID, false); } timestampList.add(baselineTimestamp); if (baselineTimestamp != -1) { maps.add(addActValue(data, new LinkedHashMap<Integer, Double>(), baselineTimestamp, resource, dVal, lambda)); } else { maps.add(null); } } else { baselineTimestamp = timestampList.get(refID); if (baselineTimestamp != -1) { addActValue(data, maps.get(refID), baselineTimestamp, resource, dVal, lambda); } } } // normalize values for (Map<Integer, Double> map : maps) { double denom = 0.0; if (map != null) { for (Map.Entry<Integer, Double> entry : map.entrySet()) { if (entry != null) { double actVal = Math.log(entry.getValue()); denom += Math.exp(actVal); entry.setValue(actVal); } } denomList.add(denom); if (normalize) { for (Map.Entry<Integer, Double> entry : map.entrySet()) { if (entry != null) { double actVal = Math.exp(entry.getValue()); entry.setValue(actVal / denom); } } } } } return maps; } public static Map<Integer, Double> getSortedArtifactMapForUser(int userID, BookmarkReader reader, List<Bookmark> userLines, List<Bookmark> testLines, boolean resource, List<Long> timestampList, List<Double> denomList, double dVal, boolean normalize) { List<Map<Integer, Double>> artifactMaps = getArtifactMaps(reader, userLines, testLines, resource, timestampList, denomList, dVal, normalize, null); if (artifactMaps != null && userID < artifactMaps.size()) { Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(artifactMaps.get(userID))); sortedResultMap.putAll(artifactMaps.get(userID)); return sortedResultMap; } return new LinkedHashMap<Integer, Double>(); } public static Map<Integer, Double> getCollectiveArtifactMap(BookmarkReader reader, List<Bookmark> userLines, List<Bookmark> testLines, boolean resource, List<Long> timestampList, List<Double> denomList, double dVal, boolean normalize) { Map<Integer, Double> collectiveArtifactMap = new LinkedHashMap<Integer, Double>(); List<Map<Integer, Double>> artifactMaps = getArtifactMaps(reader, userLines, testLines, resource, timestampList, denomList, dVal, normalize, null); for (Map<Integer, Double> map : artifactMaps) { for (Map.Entry<Integer, Double> entry : map.entrySet()) { Double val = collectiveArtifactMap.get(entry.getKey()); collectiveArtifactMap.put(entry.getKey(), val != null ? val.doubleValue() + entry.getValue() : entry.getValue()); } } Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(collectiveArtifactMap)); sortedResultMap.putAll(collectiveArtifactMap); return sortedResultMap; } private static Map<Integer, Double> addActValue(Bookmark data, Map<Integer, Double> actValues, long baselineTimestamp, boolean resource, double dVal, Double lambda) { if (!data.getTimestamp().isEmpty()) { Double newAct = 0.0; if (resource) { newAct = 1.0; } else { Double recency = (double)(baselineTimestamp - Long.parseLong(data.getTimestamp()) + 1.0); //if (recency > 365 * 24 * 60 * 60) { // newAct = 0.0; //} else { newAct = Math.pow(recency, dVal * -1.0); if (lambda != null) { double cutoff = Math.exp(recency * lambda.doubleValue() * -1.0); newAct *= cutoff; } //} } for (Integer value : data.getTags()) { Double oldAct = actValues.get(value); if (!newAct.isInfinite() && !newAct.isNaN()) { actValues.put(value, (oldAct != null ? oldAct + newAct : newAct)); } else { System.out.println("BLL error: " + data.getUserID() + "_" + baselineTimestamp + " " + data.getTimestamp()); } } } return actValues; } // Statics ------------------------------------------------------------------------------------------------------------------------------------------------------------------- private static String timeString; private static List<Map<Integer, Double>> startActCreation(BookmarkReader reader, int sampleSize, boolean sorting, boolean userBased, boolean resBased, double dVal, int beta, CalculationType cType, Double lambda) { int size = reader.getBookmarks().size(); int trainSize = size - sampleSize; Stopwatch timer = new Stopwatch(); timer.start(); BLLCalculator calculator = new BLLCalculator(reader, trainSize, dVal, beta, userBased, resBased, cType, lambda); timer.stop(); long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>(); if (trainSize == size) { trainSize = 0; } timer.reset(); timer.start(); for (int i = trainSize; i < size; i++) { // the test-set Bookmark data = reader.getBookmarks().get(i); Map<Integer, Double> map = calculator.getRankedTagList(data.getUserID(), data.getResourceID(), sorting, cType); results.add(map); } timer.stop(); long testTime = timer.elapsed(TimeUnit.MILLISECONDS); timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize); return results; } public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased, boolean resBased, double dVal, int beta, CalculationType cType, Double lambda) { Timer timerThread = new Timer(); MemoryThread memoryThread = new MemoryThread(); timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN); BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); List<Map<Integer, Double>> actValues = startActCreation(reader, sampleSize, true, userBased, resBased, dVal, beta, cType, lambda); List<int[]> predictionValues = new ArrayList<int[]>(); for (int i = 0; i < actValues.size(); i++) { Map<Integer, Double> modelVal = actValues.get(i); predictionValues.add(Ints.toArray(modelVal.keySet())); } String suffix = "_bll_c"; if (!userBased) { suffix = "_bll_r"; } else if (!resBased) { suffix = "_bll"; } if (cType == CalculationType.USER_TO_RESOURCE) { suffix += "_ac"; } else if (cType == CalculationType.USER_TO_RESOURCE_ONLY) { suffix = "_ac"; } reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); String outputfile = filename + suffix + "_" + beta + "_" + dVal; writer.writeFile(outputfile); timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory()); timerThread.cancel(); Utilities.writeStringToFile("./data/metrics/" + outputfile + "_TIME.txt", timeString); return reader; } }