package itemrecommendations; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Timer; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import java.util.Set; import java.util.TreeMap; import common.Bookmark; import common.DoubleMapComparator; import common.Features; import common.MemoryThread; import common.PerformanceMeasurement; import common.Similarity; import common.Utilities; import file.BookmarkReader; import file.PredictionFileWriter; import javax.vecmath.*; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import com.google.common.base.Stopwatch; // TODO: integrate time and memory consumption test public class SustainCalculator { //Define parameters: potentiell veraenderbar //r=2 #2 //beta=1 # 2 //n=0.7 # 0.6 //tau_cluster=0.7 #0.9 BookmarkReader reader; List<Integer> user; private int numberOfTopics; private List<Bookmark> trainList; private double lambda; private CFResourceCalculator rankedResourseCalculator; private Stopwatch timer; private Timer timerThread; private MemoryThread memoryThread; private String timeString; //listId = userId, Set= resourceIds //private List<Set<Integer>> userResourceTrainList; // listId = resourceId; mapKey = topicId; mapValue = count private List<Map<Integer, Integer>> resTopicTrainList; private Map<Integer, ArrayList<GVector>> userClusterList; private Map<Integer, GVector> userLambdaList; private String sampleName; private int trainSize; private List<Integer> uniqueUserList; private Map<Integer, List<Integer>> resourceListPerUser; public SustainCalculator(String sampleName, int trainSize){ this.timerThread = new Timer(); this.memoryThread = new MemoryThread(); this.timerThread.schedule(this.memoryThread, 0, MemoryThread.TIME_SPAN); this.trainSize = trainSize; this.sampleName = sampleName; this.reader = new BookmarkReader(trainSize, false); this.reader.readFile(sampleName); this.trainList = this.reader.getBookmarks().subList(0, trainSize); // this.testList = this.reader.getBookmarks().subList(trainSize, trainSize + testSize); this.timer = new Stopwatch(); this.timer.start(); rankedResourseCalculator = new CFResourceCalculator(this.reader, this.trainSize, false, true, false, 5, Similarity.COSINE, Features.ENTITIES); this.numberOfTopics = this.reader.getCategories().size(); //go through all users - matrix user-resource // Set is ordered per user? TODO: ask Dominik, Set can not be ordered linkedHashSet can. Is there a method to get sorted resources? //this.userResourceTrainList = Utilities.getUserResourceLists(this.trainList); //this.userResourceTestList = Utilities.getUserResourceLists(this.testList); //go through all unique resources - Erstelle resourcen topic matrix this.resTopicTrainList = Utilities.getResTopics(this.trainList); //this.resTopicTestList = Utilities.getResTopics(this.testList); this.uniqueUserList = reader.getUniqueUserListFromTestSet(trainSize); //saves Cluster per user this.userClusterList = new HashMap<Integer, ArrayList<GVector>>(); //saves lambda per user this.userLambdaList = new HashMap<Integer, GVector>(); //TODO: check, is this necessary //this.reader.setUserLines(reader.getBookmarks().subList(trainSize, trainSize + testSize)); } public BookmarkReader predictResources(double r, double tau, double beta, double learningRate, int trainingRecency, int candidateNumber, int sampleSize, double cfWeight) { // for every user for (Integer userId : this.uniqueUserList) { //TODO: pass the last 5 items List<Integer> resourceList = Bookmark.getResourcesFromUser(this.trainList, userId); if (resourceList.size()>=trainingRecency && trainingRecency!=0) resourceList = resourceList.subList(resourceList.size()-trainingRecency, resourceList.size()); train(userId, resourceList, r, tau, learningRate, beta); } this.writeUserLambdas(this.sampleName); timer.stop(); long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); timer.reset(); timer.start(); LinkedList<int[]> sortedResourcesPerUser = new LinkedList<int[]>(); for (Integer userId : this.uniqueUserList) { if (userId%100 ==0) System.out.println("user "+userId+" of "+this.uniqueUserList.size()); sortedResourcesPerUser.add(predict(userId, r, tau, learningRate, beta, candidateNumber, sampleSize, cfWeight)); } timer.stop(); long testTime = timer.elapsed(TimeUnit.MILLISECONDS); timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize); PredictionFileWriter writer = new PredictionFileWriter(reader, sortedResourcesPerUser); String outputFile = this.sampleName; writer.writeResourcePredictionsToFile(outputFile + "_sustain", this.trainSize, 0); this.timeString = PerformanceMeasurement.addMemoryMeasurement(this.timeString, false, this.memoryThread.getMaxMemory()); this.timerThread.cancel(); Utilities.writeStringToFile("./data/metrics/" + outputFile + "_sustain_TIME.txt", this.timeString); return this.reader; } private boolean writeUserLambdas(String filename) { //List<String> resourceList = this.reader.getResources(); //Map<Integer, List<Integer>> resourcesOfTestUsers = this.reader.getResourcesOfTestUsers(trainSize); try { FileWriter writer = new FileWriter(new File("./data/metrics/" + filename + "_lambdas.txt")); BufferedWriter bw = new BufferedWriter(writer); for (Entry<Integer, GVector> entry : this.userLambdaList.entrySet()) { //String resultString = (this.reader.getUsers().get(userID) + "-XYZ|"); String resultString = entry.getKey() + "| "; String resultingLambdas = ""; for (int c=0; c<entry.getValue().getSize(); c++) { resultingLambdas += entry.getValue().getElement(c)+", "; } if (resultingLambdas != "") { resultingLambdas = resultingLambdas.substring(0, resultingLambdas.length() - 2); } resultString += resultingLambdas+"\n"; bw.write(resultString); } bw.flush(); bw.close(); writer.close(); return true; } catch (IOException e) { e.printStackTrace(); } return false; } private void train(int userId, List<Integer> list, double r, double tau, double learningRate, double beta){ //LinkedList<Integer> topics = new LinkedList<Integer>(); ArrayList<GVector> clusterList = new ArrayList<GVector>(); double[] array = new double[this.numberOfTopics]; Arrays.fill(array,1); GVector lambda = new GVector(array); //clusterList.add(c0); //GVector bestCluster = new GVector(0); for (Integer resource : list){ Set<Integer> topics = this.resTopicTrainList.get(resource).keySet(); // Vector, write 1 for every existing topic GVector currentResource = new GVector(this.numberOfTopics); currentResource.zero(); for (Integer t : topics) currentResource.setElement(t, 1); // create the first cluster if (clusterList.size()==0){ clusterList.add(currentResource); continue; } double maxActivation = 0; GVector bestCluster = new GVector(0); GVector minDistance = new GVector(this.numberOfTopics); minDistance.zero(); Double totalActivation = 0.0; int index = 0; int bestIndex=0; for (GVector c : clusterList){ Pair<Double, GVector> activationPair = this.calculateActivation(currentResource, c, lambda, r); if (activationPair.getLeft()>maxActivation){ bestCluster = c; minDistance= new GVector(activationPair.getRight()); maxActivation= activationPair.getLeft(); bestIndex = index; } totalActivation += activationPair.getLeft(); index++; } // System.out.println("test"); // equation 6 Hemmung maxActivation = Math.pow(maxActivation, beta)/Math.pow(totalActivation, beta)*maxActivation; if (maxActivation<=tau){ // input forms a new cluster bestCluster = currentResource; //clusterList.add(bestCluster); clusterList.add(index, bestCluster); bestIndex = index; } GVector deltaLambda = new GVector(lambda.getSize()); // equation 13 for (int i =0; i<lambda.getSize(); i++){ double elementProduct = lambda.getElement(i)*minDistance.getElement(i); deltaLambda.setElement(i, learningRate*Math.exp(-elementProduct)*(1-elementProduct)); } //GVector.add = adds the two vectors elements lambda.add(deltaLambda); // equation 12 GVector deltaBestCluster = new GVector(bestCluster.getSize()); // delta_winCluster <- n*(I-Cluster[WinCluster,]) # eq 12 deltaBestCluster.sub(currentResource,deltaBestCluster); deltaBestCluster.scale(learningRate); //??? why adding the cluster? bestCluster.add(deltaBestCluster); clusterList.set(bestIndex, deltaBestCluster); } if (clusterList.size()>3) System.out.println(clusterList.size()+"cluster for user"+userId); this.userLambdaList.put(userId, lambda); this.userClusterList.put(userId, clusterList); } private Pair<Double, GVector> calculateActivation(GVector input, GVector cluster, GVector lambda, double r){ // Calculate distance for every cluster # eq 4 GVector distance = new GVector(input.getSize()); distance.sub(input, cluster); for (int i =0; i<distance.getSize(); i++){ // * 0.5 is removed, since we do not map 2 values for each topic, but only one distance.setElement(i, Math.abs(distance.getElement(i))); } double numerator=0; double denom=0; // Calculate cluster activation # eq 5 for (int i =0; i<lambda.getSize(); i++){ double lambdaR = Math.pow(lambda.getElement(i), r); denom = denom+lambdaR; numerator= numerator+lambdaR*Math.exp((-lambda.getElement(i)*distance.getElement(i))); } return new ImmutablePair<Double, GVector>((numerator/denom), distance); } private int[] predict(int userId, double r, double tau, double learningRate, double beta, int candidateNumber, int sampleSize, double cfWeight){ Map<Integer, Double> resourceActivationMap = new HashMap<Integer, Double>(); if (candidateNumber>0){ Map<Integer, Double> candidateSet = this.rankedResourseCalculator.getRankedResourcesList(userId, -1, true, false, false, true, false); //TreeMap<Integer, Double> candidateSet = this.calculateCandidateSet(userId); Map<Integer, Double> CFValues = new HashMap<Integer, Double>(); int count = 0; for (Map.Entry<Integer, Double> resource : candidateSet.entrySet()){ if (count == candidateNumber) break; double resourceActivation = this.calculateResourceActivations(userId, resource.getKey(), beta, r); resourceActivationMap.put(resource.getKey(), resourceActivation); CFValues.put(resource.getKey(), resource.getValue()); count++; } //resourceActivationMap = this.calculateNormalizedValues(resourceActivationMap); //CFValues = this.calculateNormalizedValues(CFValues); for ( Entry<Integer, Double> entry : resourceActivationMap.entrySet()){ double activation = entry.getValue()*(1-cfWeight)+CFValues.get(entry.getKey())* cfWeight; resourceActivationMap.put(entry.getKey(), activation); } } else{ Map<Integer, Double> candidateSet = this.rankedResourseCalculator.getRankedResourcesList(userId, -1, true, false, false, true, false); for (int resource =0; resource< this.resTopicTrainList.size(); resource++){ if (Bookmark.getResourcesFromUser(this.trainList, userId).contains(resource)) continue; double resourceActivation = this.calculateResourceActivations(userId, resource, beta, r); double activation = resourceActivation*(1-cfWeight); if (candidateSet.containsKey(resource)) activation+=candidateSet.get(resource)*cfWeight; resourceActivationMap.put(resource, activation); } } TreeMap<Integer, Double> sortedResourceActivationMap = new TreeMap<Integer, Double>(new DoubleMapComparator(resourceActivationMap)); sortedResourceActivationMap.putAll(resourceActivationMap); int[] sortedResources = new int[sampleSize]; int index =0; for (int resourceId : sortedResourceActivationMap.navigableKeySet()){ sortedResources[index] = resourceId; index ++; if (index == sampleSize) break; } return sortedResources; } private Map<Integer, Double> calculateNormalizedValues(Map<Integer, Double> values) { //normalize double sum =0; for (Map.Entry<Integer, Double> entry : values.entrySet()) { sum += entry.getValue(); } for (Map.Entry<Integer, Double> entry : values.entrySet()) { entry.setValue(1000/sum *entry.getValue()); } return values; } private double calculateResourceActivations(int userId, int resource, double beta, double r){ Set<Integer> topics = this.resTopicTrainList.get(resource).keySet(); // Vector, write 1 for every existing topic GVector currentResource = new GVector(this.numberOfTopics); currentResource.zero(); for (Integer t : topics) currentResource.setElement(t, 1); double maxActivation = 0.0; double totalActivation = 0.0; for (GVector c : this.userClusterList.get(userId)){ Pair<Double, GVector> activationPair = this.calculateActivation(currentResource, c, this.userLambdaList.get(userId), r); if (activationPair.getLeft()>maxActivation){ maxActivation = activationPair.getLeft(); } totalActivation+= activationPair.getLeft(); } maxActivation = Math.pow(maxActivation, beta)/Math.pow(totalActivation, beta)*maxActivation; return maxActivation; } private TreeMap<Integer, Double> calculateCandidateSet(int userId){ GVector lastCluster = (this.userClusterList.get(userId)).get(this.userClusterList.get(userId).size()-1); HashMap<Integer, Double> topicMap = new HashMap<Integer, Double>(); HashMap<Integer, Double> simMap = new HashMap<Integer, Double>(); for (int c=0; c<lastCluster.getSize(); c++){ if (lastCluster.getElement(c)>0) topicMap.put(c, lastCluster.getElement(c)); } for (int resource =0; resource< this.resTopicTrainList.size(); resource++){ if (Bookmark.getResourcesFromUser(this.trainList, userId).contains(resource)) continue; /// are the topics sorted Set<Integer> topics = this.resTopicTrainList.get(resource).keySet(); HashMap<Integer, Double> resourceMap = new HashMap<Integer, Double>(); for (Integer t : topics) resourceMap.put(t, 1.0); simMap.put(resource, Utilities.getCosineFloatSim(topicMap, resourceMap)); } TreeMap<Integer, Double> sortedSimMap = new TreeMap<Integer, Double>(new DoubleMapComparator(simMap)); sortedSimMap.putAll(simMap); return sortedSimMap; } }