/*
TagRecommender:
A framework to implement and evaluate algorithms for the recommendation
of tags.
Copyright (C) 2013 Dominik Kowald
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package common;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import net.sf.javaml.clustering.mcl.SparseMatrix;
import net.sf.javaml.clustering.mcl.SparseVector;
public class CooccurenceMatrix {
private SparseMatrix coocurenceMatrix;
private List<Integer> tagCounts;
public CooccurenceMatrix(List<Bookmark> bookmarks, List<Integer> tagCounts, boolean normalize) {
//System.out.println("Building matrix ...");
this.coocurenceMatrix = new SparseMatrix();
this.tagCounts = tagCounts;
this.initMatrix(bookmarks);
if (normalize) {
normalizeMatrix();
}
//calculateRelatedness();
}
private void initMatrix(List<Bookmark> bookmarks) {
for (Bookmark bookmark : bookmarks) {
List<Integer> tags = bookmark.getTags();
/*for (int x = 0; x < tags.size() - 1; x++) {
int tagIdx = tags.get(x);
for (int y = x + 1; y < tags.size(); y++) {
int tagIdy = tags.get(y);
this.coocurenceMatrix.add(tagIdx, tagIdy, 1.0);
this.coocurenceMatrix.add(tagIdy, tagIdx, 1.0);
}
}*/
for (int x = 0; x < tags.size(); x++) {
int tagIdx = tags.get(x);
for (int y = 0; y < tags.size(); y++) {
//if (x==y)
//continue;
int tagIdy = tags.get(y);
this.coocurenceMatrix.add(tagIdx, tagIdy, 1.0);
}
}
}
}
private void normalizeMatrix() {
int x = 0;
for (SparseVector vec : this.coocurenceMatrix) {
int xCount = this.tagCounts.get(x);
for (Map.Entry<Integer, Double> entry : vec.entrySet()) {
int y = entry.getKey();
int yCount = this.tagCounts.get(y);
double coocurenceCount = entry.getValue().doubleValue();
entry.setValue(coocurenceCount / (xCount + yCount - coocurenceCount) /** (double)(xCount + yCount) / (double)(xCount * yCount)*/);
}
x++;
}
}
private void calculateRelatedness() {
int sourceTag = 0;
for (SparseVector vec : this.coocurenceMatrix) {
int frequencySourceTag = this.tagCounts.get(sourceTag);
for (Map.Entry<Integer, Double> entry : vec.entrySet()) {
int destinationTag = entry.getKey();
int frequencyDestinationTag = this.tagCounts.get(destinationTag);
double coocurence = entry.getValue().doubleValue();
entry.setValue((coocurence * (frequencySourceTag+frequencyDestinationTag)) / (frequencySourceTag*frequencyDestinationTag));
}
sourceTag++;
}
}
public int getCoocurenceCount(int tag, List<Integer> destinationTags) {
int count = 0;
SparseVector vec = this.coocurenceMatrix.get(tag);
for (int destTag : destinationTags) {
Double coVal = vec.get(destTag);
if (coVal != null && coVal.doubleValue() > 0.0) {
count += coVal.doubleValue();
}
}
return count;
}
// tags = sourceTags zur aktivierung
public Map<Integer, Double> getCooccurenceTags(Map<Integer, Integer> tags) {
Map<Integer, Double> resultTags = new LinkedHashMap<Integer, Double>();
for (Map.Entry<Integer, Integer> sourceTag : tags.entrySet()) {
SparseVector vec = this.coocurenceMatrix.get(sourceTag.getKey());
for (Map.Entry<Integer, Double> coocurenceEntry : vec.entrySet()) {
double weightedValue = sourceTag.getValue() * coocurenceEntry.getValue();
//if (tagEntry.getKey() != entry.getKey() && weightedValue > 0.0) {
if (weightedValue > 0.0) {
Double tagVal = resultTags.get(coocurenceEntry.getKey());
resultTags.put(coocurenceEntry.getKey(), tagVal == null ? weightedValue : tagVal.doubleValue() + weightedValue);
}
}
}
return resultTags;
}
public Map<Integer, Double> calculateAssociativeComponentsWithTagAssosiation(Map<Integer, Double> sourceTags, Map<Integer, Double> destinationTags, boolean srcCount, boolean destCount, boolean onlyTopTags) {
Map<Integer, Double> associativeComponents = new LinkedHashMap<Integer, Double>();
Map<Integer, Double> destinationTagsCopy = new LinkedHashMap<Integer, Double>();
if (onlyTopTags) {
Map<Integer, Double> sortedDestinationTags = new TreeMap<Integer, Double>(new DoubleMapComparator(destinationTags));
sortedDestinationTags.putAll(destinationTags);
for (Map.Entry<Integer, Double> entry : sortedDestinationTags.entrySet()) {
if (destinationTagsCopy.size() < 10) {
destinationTagsCopy.put(entry.getKey(), entry.getValue());
} else {
break;
}
}
} else {
destinationTagsCopy.putAll(destinationTags);
}
if (sourceTags != null) {
for (Map.Entry<Integer, Double> tag : sourceTags.entrySet()){
associativeComponents.put(tag.getKey(), (srcCount ? tag.getValue() : 1.0) * this.calculateAssociativeComponent(tag.getKey(), destinationTagsCopy, destCount));
}
}
return associativeComponents;
}
private Double calculateAssociativeComponent(int tag, Map<Integer, Double> destinationTags, boolean destCount) {
if (destinationTags == null) {
return 0.0;
}
SparseVector vec = this.coocurenceMatrix.get(tag);
double associativeValue = 0.0;
int numbAssociatedNodes = 0;
for (Map.Entry<Integer, Double> destinationTag : destinationTags.entrySet()) {
Double relatedness = vec.get(destinationTag.getKey());
if (relatedness != null && relatedness > 0.0) {
numbAssociatedNodes++;
associativeValue += (relatedness * (destCount ? destinationTag.getValue() : 1.0));
}
}
if (!destCount && numbAssociatedNodes > 0) {
return associativeValue / numbAssociatedNodes;
}
return associativeValue;
}
}