package file.postprocessing;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import common.Bookmark;
import common.DoubleMapComparator;
import common.Utilities;
import file.BookmarkReader;
public class CatDescFiltering {
private BookmarkReader reader;
private List<Bookmark> trainList;
private List<Map<Integer, Integer>> userMaps;
private List<Set<Integer>> userResourceLists;
private Map<Integer, Double> trrMap;
private Map<Integer, Boolean> catDescMap;
private double splitValue;
private boolean describer;
public CatDescFiltering(BookmarkReader reader, int trainSize) {
this.reader = reader;
this.trainList = this.reader.getBookmarks().subList(0, trainSize);
this.userMaps = Utilities.getUserMaps(this.trainList);
this.userResourceLists = Utilities.getUserResourceLists(this.trainList);
this.trrMap = new LinkedHashMap<Integer, Double>();
for (int i = 0; i < reader.getUsers().size(); i++) {
//if (reader.getUserCounts().get(i) >= 20) {
double trr = this.getTRR(i);
if (trr > 0.0) {
this.trrMap.put(i, trr);
}
//}
}
Map<Integer, Double> sortedTrrMap = new TreeMap<Integer, Double>(new DoubleMapComparator(this.trrMap));
sortedTrrMap.putAll(this.trrMap);
int splitSize = this.trrMap.size() / 2; // reader.getUsers().size() / 2;
System.out.println("Split size: " + splitSize);
List<Double> trrList = new ArrayList<Double>(sortedTrrMap.values());
this.splitValue = trrList.get(splitSize);
System.out.println("TRR split value: " + this.splitValue);
this.catDescMap = new LinkedHashMap<Integer, Boolean>();
int i = 0;
for (Map.Entry<Integer, Double> entry : sortedTrrMap.entrySet()) {
if (i++ < splitSize) {
this.catDescMap.put(entry.getKey(), true);
} else {
this.catDescMap.put(entry.getKey(), false);
}
//System.out.println(entry.getKey() + " " + entry.getValue());
}
}
public void setDescriber(boolean categorizer) {
this.describer = categorizer;
}
public boolean getDescriber() {
return this.describer;
}
public boolean evaluate(int userID) {
Boolean describer = this.isDescriber(userID);
if (describer != null) {
return (describer.booleanValue() == this.describer);
}
return false;
}
// IMPORTANT: could be null! then do not evaluate!
private Boolean isDescriber(int userID) {
//return (this.trrMap.get(userID) > this.splitValue);
if (this.catDescMap.containsKey(userID)) {
return this.catDescMap.get(userID);
}
return null;
}
private double getTRR(int userID) {
if (userID < this.userMaps.size() && userID < this.userResourceLists.size()) {
double trr = (double)this.userMaps.get(userID).keySet().size() / (double)this.userResourceLists.get(userID).size();
return trr;
}
return 0.0; // TODO: check if null should be returned
}
private double getTPP(int userID) {
double tpp = Utilities.getMapCount(this.userMaps.get(userID)) / (double)this.userResourceLists.get(userID).size();
return tpp;
}
private double getOrphanRatio(int userID) {
Map<Integer, Integer> userMap = this.userMaps.get(userID);
int n = (int)Math.ceil((double)Collections.max(userMap.values()) / 100.0);
int count = 0;
for (int val : userMap.values()) {
if (val <= n) {
count++;
}
}
return (double)count / (double)userMap.size();
}
// Statics -----------------------------------------------------------------------------------------------------------------------
public static CatDescFiltering instantiate(String filename, int trainSize) {
BookmarkReader reader = new BookmarkReader(trainSize, false);
reader.readFile(filename);
CatDescFiltering filter = new CatDescFiltering(reader, trainSize);
/*
int catCount = 0, descCount = 0;
filter.setCategorizer(true);
for (int i = 0; i < reader.getUsers().size(); i++) {
try {
if (filter.evaluate(i)) {
catCount++;
} else {
descCount++;
}
} catch (Exception e) {
// TODO: why?
}
}
System.out.println("CatCount: " + catCount);
System.out.println("DescCount: " + descCount);
*/
return filter;
}
}