PredictionFileReader.java example

Explorer
TagRec-master
- src
/*
 TagRecommender:
 A framework to implement and evaluate algorithms for the recommendation
 of tags.
 Copyright (C) 2013 Dominik Kowald
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package file;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import com.google.common.base.Functions;
import com.google.common.collect.Lists;

import common.Bookmark;
import common.DoubleMapComparator;
import common.PredictionData;
import common.Utilities;
import file.postprocessing.CatDescFiltering;

public class PredictionFileReader {

	private List<PredictionData> predictions;
	private String filename;
	private int predictionCount;
	
	public PredictionFileReader() {
		this.predictions = new ArrayList<PredictionData>();
		this.predictionCount = 0;
	}
	
	public boolean readFile(String filename, int k, BookmarkReader wikiReader, Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) {
		try {
			this.filename = filename;
			//FileReader reader = new FileReader(new File("./data/results/" + filename + ".txt"));
			InputStreamReader reader = new InputStreamReader(new FileInputStream(new File("./data/results/" + filename + ".txt")), "UTF8");
			BufferedReader br = new BufferedReader(reader);
			String line = null;
			
			while ((line = br.readLine()) != null) {
				String[] lineParts = line.split("\\|");
				String[] parts = lineParts[0].split("-");
				int userID = -1;
				try {
					userID = Integer.parseInt(parts[0]);
				} catch (Exception e) {
					// string id - do nothing
				}
				int resID = -1;
				if (parts.length > 1) {
					resID = Integer.parseInt(parts[1]);
				}
				if (!Utilities.isEntityEvaluated(wikiReader, userID, minBookmarks, maxBookmarks, false) || !Utilities.isEntityEvaluated(wikiReader, resID, minResBookmarks, maxResBookmarks, true)) {
					continue; // skip this user if it shoudln't be evaluated - # bookmarks case
				}
				if (categorizer != null) {
					if (!categorizer.evaluate(userID)) {
						continue; // skip this user if it shoudln't be evaluated - categorizer case
					}
				}
				if (lineParts.length < 3) {
					this.predictions.add(null);
				} else {
					List<String> realData = Arrays.asList(lineParts[1].split(", "));
					List<String> predictionData = Arrays.asList(lineParts[2].split(", "));
					if (predictionData.size() > 0) {
						PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
						this.predictions.add(data);
						this.predictionCount++;
					} else {
						//System.out.println("Line does not have predictions (inner)");
						this.predictions.add(null);
					}
				}

			}
			if (k == 10) {
				System.out.println("Number of users to predict: " + this.predictions.size());
			}
			br.close();
			return true;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}
	
	public boolean readTensorFile(String filename, int k, int trainSize, BookmarkReader bookmarkReader, Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) {
		this.filename = filename;
		List<Bookmark> testLines = bookmarkReader.getBookmarks().subList(trainSize, bookmarkReader.getBookmarks().size());
		
		FileReader reader;
		try {
			reader = new FileReader(new File("./data/results/" + filename + ".txt"));
			BufferedReader br = new BufferedReader(reader);
			String line = null;
			String userID = null, resID = null;
			Map<Integer, Double> tensorTags = new LinkedHashMap<Integer, Double>();
			int count = 0;
			while ((line = br.readLine()) != null) {
				String[] lineParts = line.split(" ");
				if (userID != null && resID != null && (!userID.equals(lineParts[0]) || !resID.equals(lineParts[1]))) {
					// new testline
					List<Integer> realData = testLines.get(count++).getTags();
					List<Integer> predictionData = new ArrayList<Integer>();
					Map<Integer, Double> sortedTensorTags = new TreeMap<Integer, Double>(new DoubleMapComparator(tensorTags));
					sortedTensorTags.putAll(tensorTags);
					for (Integer tag : sortedTensorTags.keySet()) {
						predictionData.add(tag);
					}
					
					PredictionData data = new PredictionData(Integer.parseInt(userID), Integer.parseInt(resID),
							Lists.transform(realData, Functions.toStringFunction()), Lists.transform(predictionData, Functions.toStringFunction()), k);
					this.predictions.add(data);
					this.predictionCount++;
					tensorTags.clear();
				}
				userID = lineParts[0];
				resID = lineParts[1];
				tensorTags.put(Integer.parseInt(lineParts[2]), Double.parseDouble(lineParts[3]));
			}
			br.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return true;
	}
	
	public boolean readMyMediaLiteFile(String filename, int k, int trainSize, BookmarkReader bookmarkReader, Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) {
		try {
			this.filename = filename;
			List<Integer> testUsers = bookmarkReader.getUniqueUserListFromTestSet(trainSize);
			Map<Integer, List<Integer>> resourcesOfTestUsers = bookmarkReader.getResourcesOfTestUsers(trainSize);
			FileReader reader = new FileReader(new File("./data/results/" + filename + ".txt"));
			BufferedReader br = new BufferedReader(reader);
			String line = null;
			
			while ((line = br.readLine()) != null) {
				String[] lineParts = line.split("\\t");
				if (lineParts.length == 0) {
					continue; // skip invalid line
				}
				
				int userID = -1, resID = -1;
				try {
					userID = Integer.parseInt(lineParts[0]);
				} catch (Exception e) {
					continue; // skip user if userid is invalid
				}
				
				if (!testUsers.contains(userID)) {
					continue; // skip user if it is not part of the test-set
				}
				if (!Utilities.isEntityEvaluated(bookmarkReader, userID, minBookmarks, maxBookmarks, false)) {
					continue; // skip this user if it shoudln't be evaluated - # bookmarks case
				}
				if (categorizer != null) {
					if (!categorizer.evaluate(userID)) {
						continue; // skip this user if it shoudln't be evaluated - categorizer case
					}
				}
				List<Integer> testResources = resourcesOfTestUsers.get(userID);
				List<String> realData = new ArrayList<String>();
				for (int testRes : testResources) {
					realData.add(bookmarkReader.getResources().get(testRes));
				}
				
				if (lineParts.length > 1) {
					String recommendationString = lineParts[1].replace("[", "").replace("]", "");
					List<String> predictionStringData = Arrays.asList(recommendationString.split(","));
					if (predictionStringData.size() > 0) {
						List<String> predictionData = new ArrayList<String>();
						for (String predictionString : predictionStringData) {
							predictionData.add(predictionString.substring(0, predictionString.indexOf(":")));
						}
						PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
						this.predictions.add(data);
						this.predictionCount++;
					} else {
						//System.out.println("Line does not have predictions (inner)");
						this.predictions.add(null);
					}
				} else {
					//System.out.println("Line does not have predictions (outer)");
					this.predictions.add(null);
				}
			}
			if (k == 1) {
				System.out.println("Number of users to predict: " + this.predictions.size());
			}
			br.close();
			return true;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}
	
	// Getter ------------------------------------------------------------------------------------------------	
	public List<PredictionData> getPredictionData() {
		return this.predictions;
	}
	
	public String getFilename() {
		return this.filename;
	}
	
	public int getPredictionCount() {
		return this.predictionCount;
	}
}