package edu.umn.cs.recsys.ii;
import com.google.common.collect.ImmutableMap;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.longs.LongSortedSet;
import org.grouplens.lenskit.collections.LongUtils;
import org.grouplens.lenskit.core.Transient;
import org.grouplens.lenskit.cursors.Cursor;
import org.grouplens.lenskit.data.dao.ItemDAO;
import org.grouplens.lenskit.data.dao.UserEventDAO;
import org.grouplens.lenskit.data.event.Event;
import org.grouplens.lenskit.data.history.RatingVectorUserHistorySummarizer;
import org.grouplens.lenskit.data.history.UserHistory;
import org.grouplens.lenskit.scored.ScoredId;
import org.grouplens.lenskit.scored.ScoredIdListBuilder;
import org.grouplens.lenskit.scored.ScoredIds;
import org.grouplens.lenskit.util.TopNScoredItemAccumulator;
import org.grouplens.lenskit.vectors.ImmutableSparseVector;
import org.grouplens.lenskit.vectors.MutableSparseVector;
import org.grouplens.lenskit.vectors.SparseVector;
import org.grouplens.lenskit.vectors.VectorEntry;
import org.grouplens.lenskit.vectors.similarity.CosineVectorSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Provider;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* @author <a href="http://www.grouplens.org">GroupLens Research</a>
*/
public class SimpleItemItemModelBuilder implements Provider<SimpleItemItemModel> {
private final ItemDAO itemDao;
private final UserEventDAO userEventDao;
private static final Logger logger = LoggerFactory.getLogger(SimpleItemItemModelBuilder.class);;
@Inject
public SimpleItemItemModelBuilder(@Transient ItemDAO idao,
@Transient UserEventDAO uedao) {
itemDao = idao;
userEventDao = uedao;
}
@Override
public SimpleItemItemModel get() {
// Get the transposed rating matrix
// This gives us a map of item IDs to those items' rating vectors
Map<Long, ImmutableSparseVector> itemVectors = getItemVectors();
// Get all items - you might find this useful
LongSortedSet items = LongUtils.packedSet(itemVectors.keySet());
// Map items to vectors of item similarities
//Map<Long,MutableSparseVector> itemSimilarities = new HashMap<Long, MutableSparseVector>();
Map<Long, List<ScoredId>> neighborhoods = new HashMap<Long, List<ScoredId>>();
// Computing the similarities between each pair of items
// It will need to be in a map of longs to lists of Scored IDs to store in the model
for(Iterator outerIter = items.iterator(); outerIter.hasNext() ; ) {
Long thisItemId = (Long) outerIter.next();
TopNScoredItemAccumulator accumulator = new TopNScoredItemAccumulator(items.size()-1);
// Calculate similiarity with other item one by one and
for(Iterator innerIter = items.iterator(); innerIter.hasNext() ; ) {
Long nghbrItemId = (Long) innerIter.next();
if(thisItemId.equals(nghbrItemId)) continue;
// cosine similarity
double similarity = new CosineVectorSimilarity().similarity(itemVectors.get(thisItemId),
itemVectors.get(nghbrItemId));
//accumulate
if (similarity > 0) {
accumulator.put(nghbrItemId, similarity);
}
}
//put in the final list of sorted neighbors
List<ScoredId> similarities = accumulator.finish();
neighborhoods.put(thisItemId, similarities);
}
return new SimpleItemItemModel(neighborhoods);
//return new SimpleItemItemModel(Collections.EMPTY_MAP);
}
/**
* Load the data into memory, indexed by item.
* @return A map from item IDs to item rating vectors. Each vector contains users' ratings for
* the item, keyed by user ID.
*/
public Map<Long,ImmutableSparseVector> getItemVectors() {
// set up storage for building each item's rating vector
LongSet items = itemDao.getItemIds();
// map items to maps from users to ratings
Map<Long,Map<Long,Double>> itemData = new HashMap<Long, Map<Long, Double>>();
for (long item: items) {
itemData.put(item, new HashMap<Long, Double>());
}
// itemData should now contain a map to accumulate the ratings of each item
// stream over all user events
Cursor<UserHistory<Event>> stream = userEventDao.streamEventsByUser();
try {
for (UserHistory<Event> evt: stream) {
MutableSparseVector vector = RatingVectorUserHistorySummarizer.makeRatingVector(evt).mutableCopy();
// vector is now the user's rating vector
// Normalizing this vector and store the ratings in the item data
vector.add(-(vector.mean()));
for (VectorEntry e: vector) {
itemData.get(e.getKey()).put(evt.getUserId(), e.getValue());
}
}
} finally {
stream.close();
}
// This loop converts our temporary item storage to a map of item vectors
Map<Long,ImmutableSparseVector> itemVectors = new HashMap<Long, ImmutableSparseVector>();
for (Map.Entry<Long,Map<Long,Double>> entry: itemData.entrySet()) {
MutableSparseVector vec = MutableSparseVector.create(entry.getValue());
itemVectors.put(entry.getKey(), vec.immutable());
}
return itemVectors;
}
}