package processing.analyzing; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; import common.Bookmark; import common.CooccurenceMatrix; import common.Utilities; import file.BookmarkReader; public class TagReuseProbAnalyzer { private BookmarkReader reader = null; private List<Bookmark> trainSet = null; private List<Bookmark> testSet = null; private List<List<Bookmark>> userBookmarks = null; private List<UserTagProperties> userTagProperties = null; private CooccurenceMatrix tagMatrix = null; public TagReuseProbAnalyzer(BookmarkReader reader, int trainSize, boolean context) { this.reader = reader; this.trainSet = reader.getBookmarks().subList(0, trainSize); this.testSet = reader.getBookmarks().subList(trainSize, reader.getBookmarks().size()); this.userTagProperties = new ArrayList<UserTagProperties>(); if (context) { this.tagMatrix = new CooccurenceMatrix(this.trainSet, reader.getTagCounts(), false); } this.userBookmarks = Utilities.getBookmarks(this.trainSet, false); int userID = 0; for (List<Bookmark> currentBookmarks : this.userBookmarks) { if (userID != currentBookmarks.get(0).getUserID()) { System.out.println("ERROR"); } Bookmark testBookmark = Bookmark.getBookmark(this.testSet, userID, -1); if (testBookmark != null) { this.userTagProperties.add(new UserTagProperties(currentBookmarks, testBookmark, this.tagMatrix)); } userID++; } } public void mergeAndWriteUserTagProperties(String filename) { Map<Integer, ReuseProbValue> tagFrequencies = new LinkedHashMap<Integer, ReuseProbValue>(); Map<Integer, ReuseProbValue> tagRecencies = new LinkedHashMap<Integer, ReuseProbValue>(); Map<Integer, ReuseProbValue> tagContextSim = new LinkedHashMap<Integer, ReuseProbValue>(); for (UserTagProperties tagProperties : this.userTagProperties) { for (Map.Entry<Integer, Integer> tagFrequ : tagProperties.getTagCounts().entrySet()) { ReuseProbValue value = tagFrequencies.get(tagFrequ.getValue()); if (value == null) { value = new ReuseProbValue(); tagFrequencies.put(tagFrequ.getValue(), value); } value.increment(tagProperties.getReuseProb().get(tagFrequ.getKey())); } for (Map.Entry<Integer, Integer> tagRec : tagProperties.getTagRecencies().entrySet()) { if (tagRec.getValue() > 0) { ReuseProbValue value = tagRecencies.get(tagRec.getValue()); if (value == null) { value = new ReuseProbValue(); tagRecencies.put(tagRec.getValue(), value); } value.increment(tagProperties.getReuseProb().get(tagRec.getKey())); } } if (this.tagMatrix != null) { for (Map.Entry<Integer, Integer> tagSim : tagProperties.getTagContextSim().entrySet()) { ReuseProbValue value = tagContextSim.get(tagSim.getValue()); if (value == null) { value = new ReuseProbValue(); tagContextSim.put(tagSim.getValue(), value); } value.increment(tagProperties.getReuseProb().get(tagSim.getKey())); } } } // sort and write Map<Integer, ReuseProbValue> sortedTagFrequencies = new TreeMap<Integer, ReuseProbValue>(tagFrequencies); Map<Integer, ReuseProbValue> sortedTagRecencies = new TreeMap<Integer, ReuseProbValue>(tagRecencies); writeMap(filename + "_Frequency", sortedTagFrequencies, false, false); writeMap(filename + "_Recency", sortedTagRecencies, true, false); writeMap(filename + "_Recency_power", sortedTagRecencies, true, true); if (this.tagMatrix != null) { Map<Integer, ReuseProbValue> sortedTagContextSim = new TreeMap<Integer, ReuseProbValue>(tagContextSim); writeMap(filename + "_ContextSim", sortedTagContextSim, false, false); } } private void writeMap(String filename, Map<Integer, ReuseProbValue> map, boolean normalize, boolean powerlaw) { try { FileWriter writer = new FileWriter(new File("./data/csv/" + filename + ".txt")); BufferedWriter bw = new BufferedWriter(writer); double normVal = -1; if (normalize) { normVal = getMapNormalizeValue(map); } for (Map.Entry<Integer, ReuseProbValue> entry : map.entrySet()) { double sum = entry.getValue().getSum(); double count = entry.getValue().getCount(); if (sum > 0.0) { if (normalize) { if (powerlaw) { for (double i = 1.0; i < count; i++) { bw.write(entry.getKey() + "\n"); } } else { bw.write(entry.getKey() + ";" + sum / normVal + "\n"); } } else { bw.write(entry.getKey() + ";" + sum / count + "\n"); } } } bw.close(); } catch (IOException e) { e.printStackTrace(); } } private double getMapNormalizeValue(Map<Integer, ReuseProbValue> map) { double sum = 0.0; for (Map.Entry<Integer, ReuseProbValue> entry : map.entrySet()) { if (entry.getValue().getSum() > sum) { //sum = entry.getValue().getSum(); if (entry.getValue().getSum() > sum) { sum = entry.getValue().getSum(); } } } return sum; } // Statics public static void analyzeSample(String filename, int trainSize, int sampleSize, boolean context) { BookmarkReader reader = new BookmarkReader(trainSize, false); reader.readFile(filename); TagReuseProbAnalyzer analyzer = new TagReuseProbAnalyzer(reader, trainSize, context); analyzer.mergeAndWriteUserTagProperties(filename); } }