/* TagRecommender: A framework to implement and evaluate algorithms for the recommendation of tags. Copyright (C) 2013 Dominik Kowald This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package processing; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Timer; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import com.google.common.base.Stopwatch; import com.google.common.primitives.Ints; import common.IntMapComparator; import common.Bookmark; import common.MemoryThread; import common.PerformanceMeasurement; import common.Utilities; import file.PredictionFileWriter; import file.BookmarkReader; public class MPCalculator { private static String timeString; private static List<int[]> getPerfectTags(BookmarkReader reader, int sampleSize, int limit) { List<int[]> tags = new ArrayList<int[]>(); int trainSize = reader.getBookmarks().size() - sampleSize; for (Bookmark data : reader.getBookmarks().subList(trainSize, trainSize + sampleSize)) { List<Integer> t = new ArrayList<Integer>(data.getTags()); //while (t.size() < limit) { // t.add(-1); //} tags.add(Ints.toArray(t)); } return tags; } private static int[] getPopularTagList(BookmarkReader reader, int size) { Map<Integer, Integer> countMap = new LinkedHashMap<Integer, Integer>(); for (int i = 0; i < reader.getTagCounts().size(); i++) { countMap.put(i, reader.getTagCounts().get(i)); } Map<Integer, Integer> sortedCountMap = new TreeMap<Integer, Integer>(new IntMapComparator(countMap)); sortedCountMap.putAll(countMap); int[] tagIDs = new int[size]; int i = 0; for (Integer key : sortedCountMap.keySet()) { if (i < size) { tagIDs[i++] = key; } else { break; } } return tagIDs; } private static List<int[]> getPopularTags(BookmarkReader reader, int sampleSize, int limit) { List<int[]> tags = new ArrayList<int[]>(); Stopwatch timer = new Stopwatch(); timer.start(); int[] tagIDs = getPopularTagList(reader, limit); timer.stop(); long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); timer.reset(); timer.start(); for (int j = 0; j < sampleSize; j++) { tags.add(tagIDs); } timer.stop(); long testTime = timer.elapsed(TimeUnit.MILLISECONDS); timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize); return tags; } // public statics -------------------------------------------------------------------------------------------- public static BookmarkReader predictPopularTags(String filename, int trainSize, int sampleSize, boolean mp) { Timer timerThread = new Timer(); MemoryThread memoryThread = new MemoryThread(); timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN); BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); List<int[]> values = null; if (mp) { values = getPopularTags(reader, sampleSize, 10); } else { values = getPerfectTags(reader, sampleSize, 10); } reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); PredictionFileWriter writer = new PredictionFileWriter(reader, values); writer.writeFile(filename + "_mp"); timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory()); timerThread.cancel(); Utilities.writeStringToFile("./data/metrics/" + filename + "_mp" + "_TIME.txt", timeString); return reader; } }