/* TagRecommender: A framework to implement and evaluate algorithms for the recommendation of tags. Copyright (C) 2013 Dominik Kowald This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package processing; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import com.google.common.primitives.Ints; import file.PredictionFileWriter; import file.BookmarkReader; import common.Bookmark; public class ContentBasedCalculator { private final static int REC_LIMIT = 10; private BookmarkReader reader; private List<Bookmark> trainList; public ContentBasedCalculator(BookmarkReader reader, int trainSize) { this.reader = reader; // TODO: use this data for recommendations this.trainList = this.reader.getBookmarks().subList(0, trainSize); } public Map<Integer, Double> getRankedTagList(int userID, int resID) { Map<Integer, Double> resultMap = new LinkedHashMap<Integer, Double>(); // TODO: calculate your recommendations here and return the top-10 (=REC_LIMIT) tags with probability value // have also a look on the other calculator classes! // TODO: in order to improve your content-based recommender, you can merge your results with other approaches like the ones from the LanguageModelCalculator or ActCalculator return resultMap; } // --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- public static List<Map<Integer, Double>> startContentBasedCreation(BookmarkReader reader, int sampleSize) { int size = reader.getBookmarks().size(); int trainSize = size - sampleSize; ContentBasedCalculator calculator = new ContentBasedCalculator(reader, trainSize); List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>(); if (trainSize == size) { trainSize = 0; } for (int i = trainSize; i < size; i++) { // the test-set Bookmark data = reader.getBookmarks().get(i); Map<Integer, Double> map = calculator.getRankedTagList(data.getUserID(), data.getResourceID()); results.add(map); } return results; } public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize) { BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); List<Map<Integer, Double>> modelValues = startContentBasedCreation(reader, sampleSize); List<int[]> predictionValues = new ArrayList<int[]>(); for (int i = 0; i < modelValues.size(); i++) { Map<Integer, Double> modelVal = modelValues.get(i); predictionValues.add(Ints.toArray(modelVal.keySet())); } String suffix = "_cb"; reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); String outputFile = filename + suffix; writer.writeFile(outputFile); return reader; } }