/* TagRecommender: A framework to implement and evaluate algorithms for the recommendation of tags. Copyright (C) 2013 Dominik Kowald This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package file; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; import com.google.common.base.Functions; import com.google.common.collect.Lists; import common.Bookmark; import common.DoubleMapComparator; import common.PredictionData; import common.Utilities; import file.postprocessing.CatDescFiltering; public class PredictionFileReader { private List<PredictionData> predictions; private String filename; private int predictionCount; public PredictionFileReader() { this.predictions = new ArrayList<PredictionData>(); this.predictionCount = 0; } public boolean readFile(String filename, int k, BookmarkReader wikiReader, Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) { try { this.filename = filename; //FileReader reader = new FileReader(new File("./data/results/" + filename + ".txt")); InputStreamReader reader = new InputStreamReader(new FileInputStream(new File("./data/results/" + filename + ".txt")), "UTF8"); BufferedReader br = new BufferedReader(reader); String line = null; while ((line = br.readLine()) != null) { String[] lineParts = line.split("\\|"); String[] parts = lineParts[0].split("-"); int userID = -1; try { userID = Integer.parseInt(parts[0]); } catch (Exception e) { // string id - do nothing } int resID = -1; if (parts.length > 1) { resID = Integer.parseInt(parts[1]); } if (!Utilities.isEntityEvaluated(wikiReader, userID, minBookmarks, maxBookmarks, false) || !Utilities.isEntityEvaluated(wikiReader, resID, minResBookmarks, maxResBookmarks, true)) { continue; // skip this user if it shoudln't be evaluated - # bookmarks case } if (categorizer != null) { if (!categorizer.evaluate(userID)) { continue; // skip this user if it shoudln't be evaluated - categorizer case } } if (lineParts.length < 3) { this.predictions.add(null); } else { List<String> realData = Arrays.asList(lineParts[1].split(", ")); List<String> predictionData = Arrays.asList(lineParts[2].split(", ")); if (predictionData.size() > 0) { PredictionData data = new PredictionData(userID, resID, realData, predictionData, k); this.predictions.add(data); this.predictionCount++; } else { //System.out.println("Line does not have predictions (inner)"); this.predictions.add(null); } } } if (k == 10) { System.out.println("Number of users to predict: " + this.predictions.size()); } br.close(); return true; } catch (Exception e) { e.printStackTrace(); } return false; } public boolean readTensorFile(String filename, int k, int trainSize, BookmarkReader bookmarkReader, Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) { this.filename = filename; List<Bookmark> testLines = bookmarkReader.getBookmarks().subList(trainSize, bookmarkReader.getBookmarks().size()); FileReader reader; try { reader = new FileReader(new File("./data/results/" + filename + ".txt")); BufferedReader br = new BufferedReader(reader); String line = null; String userID = null, resID = null; Map<Integer, Double> tensorTags = new LinkedHashMap<Integer, Double>(); int count = 0; while ((line = br.readLine()) != null) { String[] lineParts = line.split(" "); if (userID != null && resID != null && (!userID.equals(lineParts[0]) || !resID.equals(lineParts[1]))) { // new testline List<Integer> realData = testLines.get(count++).getTags(); List<Integer> predictionData = new ArrayList<Integer>(); Map<Integer, Double> sortedTensorTags = new TreeMap<Integer, Double>(new DoubleMapComparator(tensorTags)); sortedTensorTags.putAll(tensorTags); for (Integer tag : sortedTensorTags.keySet()) { predictionData.add(tag); } PredictionData data = new PredictionData(Integer.parseInt(userID), Integer.parseInt(resID), Lists.transform(realData, Functions.toStringFunction()), Lists.transform(predictionData, Functions.toStringFunction()), k); this.predictions.add(data); this.predictionCount++; tensorTags.clear(); } userID = lineParts[0]; resID = lineParts[1]; tensorTags.put(Integer.parseInt(lineParts[2]), Double.parseDouble(lineParts[3])); } br.close(); } catch (Exception e) { e.printStackTrace(); } return true; } public boolean readMyMediaLiteFile(String filename, int k, int trainSize, BookmarkReader bookmarkReader, Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) { try { this.filename = filename; List<Integer> testUsers = bookmarkReader.getUniqueUserListFromTestSet(trainSize); Map<Integer, List<Integer>> resourcesOfTestUsers = bookmarkReader.getResourcesOfTestUsers(trainSize); FileReader reader = new FileReader(new File("./data/results/" + filename + ".txt")); BufferedReader br = new BufferedReader(reader); String line = null; while ((line = br.readLine()) != null) { String[] lineParts = line.split("\\t"); if (lineParts.length == 0) { continue; // skip invalid line } int userID = -1, resID = -1; try { userID = Integer.parseInt(lineParts[0]); } catch (Exception e) { continue; // skip user if userid is invalid } if (!testUsers.contains(userID)) { continue; // skip user if it is not part of the test-set } if (!Utilities.isEntityEvaluated(bookmarkReader, userID, minBookmarks, maxBookmarks, false)) { continue; // skip this user if it shoudln't be evaluated - # bookmarks case } if (categorizer != null) { if (!categorizer.evaluate(userID)) { continue; // skip this user if it shoudln't be evaluated - categorizer case } } List<Integer> testResources = resourcesOfTestUsers.get(userID); List<String> realData = new ArrayList<String>(); for (int testRes : testResources) { realData.add(bookmarkReader.getResources().get(testRes)); } if (lineParts.length > 1) { String recommendationString = lineParts[1].replace("[", "").replace("]", ""); List<String> predictionStringData = Arrays.asList(recommendationString.split(",")); if (predictionStringData.size() > 0) { List<String> predictionData = new ArrayList<String>(); for (String predictionString : predictionStringData) { predictionData.add(predictionString.substring(0, predictionString.indexOf(":"))); } PredictionData data = new PredictionData(userID, resID, realData, predictionData, k); this.predictions.add(data); this.predictionCount++; } else { //System.out.println("Line does not have predictions (inner)"); this.predictions.add(null); } } else { //System.out.println("Line does not have predictions (outer)"); this.predictions.add(null); } } if (k == 1) { System.out.println("Number of users to predict: " + this.predictions.size()); } br.close(); return true; } catch (Exception e) { e.printStackTrace(); } return false; } // Getter ------------------------------------------------------------------------------------------------ public List<PredictionData> getPredictionData() { return this.predictions; } public String getFilename() { return this.filename; } public int getPredictionCount() { return this.predictionCount; } }