package edu.umn.cs.recsys.ii;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.longs.LongSortedSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.inject.Inject;
import javax.inject.Provider;
import org.grouplens.lenskit.collections.LongUtils;
import org.grouplens.lenskit.core.Transient;
import org.grouplens.lenskit.cursors.Cursor;
import org.grouplens.lenskit.data.dao.ItemDAO;
import org.grouplens.lenskit.data.dao.UserEventDAO;
import org.grouplens.lenskit.data.event.Event;
import org.grouplens.lenskit.data.history.RatingVectorUserHistorySummarizer;
import org.grouplens.lenskit.data.history.UserHistory;
import org.grouplens.lenskit.scored.ScoredId;
import org.grouplens.lenskit.util.TopNScoredItemAccumulator;
import org.grouplens.lenskit.vectors.ImmutableSparseVector;
import org.grouplens.lenskit.vectors.MutableSparseVector;
import org.grouplens.lenskit.vectors.VectorEntry;
import org.grouplens.lenskit.vectors.similarity.CosineVectorSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author <a href="http://www.grouplens.org">GroupLens Research</a>
*/
public class SimpleItemItemModelBuilder implements Provider<SimpleItemItemModel> {
private final ItemDAO itemDao;
private final UserEventDAO userEventDao;
@SuppressWarnings("unused")
private static final Logger logger = LoggerFactory.getLogger(SimpleItemItemModelBuilder.class);;
@Inject
public SimpleItemItemModelBuilder(@Transient ItemDAO idao,
@Transient UserEventDAO uedao) {
itemDao = idao;
userEventDao = uedao;
}
@Override
public SimpleItemItemModel get() {
// Get the transposed rating matrix
// This gives us a map of item IDs to those items' rating vectors
Map<Long, ImmutableSparseVector> itemVectors = getItemVectors();
// Get all items - you might find this useful
LongSortedSet items = LongUtils.packedSet(itemVectors.keySet());
// Map items to vectors of item similarities
@SuppressWarnings("unused")
Map<Long,MutableSparseVector> itemSimilarities = new HashMap<Long, MutableSparseVector>();
// Compute the similarities between each pair of items
// It will need to be in a map of longs to lists of Scored IDs to store in the model
Map<Long, List<ScoredId>> neighborhoods = new HashMap<Long, List<ScoredId>>();
// Compute the similarities between each pair of items
CosineVectorSimilarity cosine = new CosineVectorSimilarity();
for(long item : items){
// get this item ratings
ImmutableSparseVector itemRatings = itemVectors.get(item);
// create the accumulator for this item
TopNScoredItemAccumulator accumulator = new TopNScoredItemAccumulator(items.size() - 1);
for(long neighbor : items){
// skip itself
if(item == neighbor) continue;
ImmutableSparseVector neighRatings = itemVectors.get(neighbor);
// cosine similarity
double similarity = cosine.similarity(itemRatings, neighRatings);
//accumulate positive similarities
if(similarity >= 0.0){
accumulator.put(neighbor, similarity);
}
}
//get the final list of sorted neighbors
List<ScoredId> similarities = accumulator.finish();
// update the map of similarity
neighborhoods.put(item, similarities);
}
// It will need to be in a map of longs to lists of Scored IDs to store in the model
return new SimpleItemItemModel(neighborhoods);
}
/**
* Load the data into memory, indexed by item.
* @return A map from item IDs to item rating vectors. Each vector contains users' ratings for
* the item, keyed by user ID.
*/
public Map<Long,ImmutableSparseVector> getItemVectors() {
// set up storage for building each item's rating vector
LongSet items = itemDao.getItemIds();
// map items to maps from users to ratings
Map<Long,Map<Long,Double>> itemData = new HashMap<Long, Map<Long, Double>>();
for (long item: items) {
itemData.put(item, new HashMap<Long, Double>());
}
// itemData should now contain a map to accumulate the ratings of each item
// stream over all user events
Cursor<UserHistory<Event>> stream = userEventDao.streamEventsByUser();
try {
for (UserHistory<Event> evt: stream) {
MutableSparseVector vector = RatingVectorUserHistorySummarizer.makeRatingVector(evt).mutableCopy();
// vector is now the user's rating vector
// Normalize this vector
vector.add(-vector.mean());
// Store the ratings in the item data
for (VectorEntry vectorEntry : vector.fast(VectorEntry.State.EITHER)) {
long itemId = vectorEntry.getKey();
double rating = vectorEntry.getValue();
long userId = evt.getUserId();
itemData.get(itemId).put(userId, rating);
}
}
} finally {
stream.close();
}
// This loop converts our temporary item storage to a map of item vectors
Map<Long,ImmutableSparseVector> itemVectors = new HashMap<Long, ImmutableSparseVector>();
for (Map.Entry<Long,Map<Long,Double>> entry: itemData.entrySet()) {
MutableSparseVector vec = MutableSparseVector.create(entry.getValue());
itemVectors.put(entry.getKey(), vec.immutable());
}
return itemVectors;
}
}