package edu.umn.cs.recsys; import com.google.common.collect.ImmutableList; import edu.umn.cs.recsys.dao.ItemTagDAO; import org.grouplens.lenskit.core.LenskitRecommender; import org.grouplens.lenskit.eval.algorithm.AlgorithmInstance; import org.grouplens.lenskit.eval.data.traintest.TTDataSet; import org.grouplens.lenskit.eval.metrics.AbstractTestUserMetric; import org.grouplens.lenskit.eval.metrics.TestUserMetricAccumulator; import org.grouplens.lenskit.eval.metrics.topn.ItemSelectors; import org.grouplens.lenskit.eval.traintest.TestUser; import org.grouplens.lenskit.scored.ScoredId; import org.grouplens.lenskit.vectors.MutableSparseVector; import org.grouplens.lenskit.vectors.VectorEntry; import org.grouplens.lenskit.vectors.VectorEntry.State; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nonnull; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * A metric that measures the tag entropy of the recommended items. * @author <a href="http://www.grouplens.org">GroupLens Research</a> */ public class TagEntropyMetric extends AbstractTestUserMetric { private final int listSize; private final List<String> columns; private static final Logger logger = LoggerFactory.getLogger(TagEntropyMetric.class); /** * Construct a new tag entropy metric. * * @param nitems The number of items to request. */ public TagEntropyMetric(int nitems) { listSize = nitems; // initialize column labels with list length columns = ImmutableList.of(String.format("TagEntropy@%d", nitems)); } /** * Make a metric accumulator. Metrics operate with <em>accumulators</em>, which are created * for each algorithm and data set. The accumulator measures each user's output, and * accumulates the results into a global statistic for the whole evaluation. * * @param algorithm The algorithm being tested. * @param data The data set being tested with. * @return An accumulator for analyzing this algorithm and data set. */ @Override public TestUserMetricAccumulator makeAccumulator(AlgorithmInstance algorithm, TTDataSet data) { return new TagEntropyAccumulator(); } /** * Return the labels for the (global) columns returned by this metric. * @return The labels for the global columns. */ @Override public List<String> getColumnLabels() { return columns; } /** * Return the lables for the per-user columns returned by this metric. */ @Override public List<String> getUserColumnLabels() { // per-user and global have the same fields, they just differ in aggregation. return columns; } private class TagEntropyAccumulator implements TestUserMetricAccumulator { private double totalEntropy = 0; private int userCount = 0; private Set<Long> movieTagIds(TagVocabulary vocab, ItemTagDAO tagDAO, long movie) { Set<Long> tagIds = new HashSet<Long>(); List<String> movieTags = tagDAO.getItemTags(movie); for (String tag : movieTags) { long tagid = vocab.getTagId(tag); tagIds.add(tagid); } return tagIds; } private Map<Long, Set<Long>> movieTagIdMap(TagVocabulary vocab, ItemTagDAO tagDAO, List<ScoredId> recommendations) { Map<Long, Set<Long>> moviemap = new HashMap<Long, Set<Long>>(); for (ScoredId scoredId : recommendations) { long movie = scoredId.getId(); Set<Long> tagIds = movieTagIds(vocab, tagDAO, movie); moviemap.put(movie, tagIds); } return moviemap; } /** * A vector of tagids just within the movies in the set. * @param recommendations * @return */ private MutableSparseVector moviesetVocab(TagVocabulary vocab, ItemTagDAO tagDAO, Map<Long, Set<Long>> moviemap) { Set<Long> subsetVocab = new HashSet<Long>(); for (long movie : moviemap.keySet()) { Set<Long> tagids = moviemap.get(movie); for (long tagid : tagids) { subsetVocab.add(tagid); } } return MutableSparseVector.create(subsetVocab); } private MutableSparseVector tagProbabilities(TagVocabulary vocab, ItemTagDAO tagDAO, List<ScoredId> recommendations) { Map<Long, Set<Long>> moviemap = movieTagIdMap(vocab, tagDAO, recommendations); double movieCount = moviemap.size(); assert (movieCount == recommendations.size()); MutableSparseVector tagProbabilities = moviesetVocab(vocab, tagDAO, moviemap); logger.info("Tag vocabulary size: {}", tagProbabilities.keyDomain().size()); for (VectorEntry entry : tagProbabilities.fast(State.EITHER)) { long tagid = entry.getKey(); double probability = 0.0; for (long movie : moviemap.keySet()) { Set<Long> movietags = moviemap.get(movie); double numTags = movietags.size(); if (movietags.contains(tagid)) { probability += (1.0 / movieCount) * (1.0 / numTags); } } tagProbabilities.set(entry, probability); } return tagProbabilities; } private double entropy(TagVocabulary vocab, ItemTagDAO tagDAO, List<ScoredId> recommendations) { MutableSparseVector tagProbs = tagProbabilities(vocab, tagDAO, recommendations); MutableSparseVector logProbs = MutableSparseVector.create(tagProbs.keyDomain()); for (VectorEntry entry : logProbs.fast(State.UNSET)) { long tagid = entry.getKey(); double probability = tagProbs.get(tagid); double logprob = Math.log(probability) / Math.log(2); logProbs.set(entry, logprob); } return -1.0 * logProbs.dot(tagProbs); } /** * Evaluate a single test user's recommendations or predictions. * @param testUser The user's recommendation result. * @return The values for the per-user columns. */ @Nonnull @Override public Object[] evaluate(TestUser testUser) { List<ScoredId> recommendations = testUser.getRecommendations(listSize, ItemSelectors.allItems(), ItemSelectors.trainingItems()); if (recommendations == null) { return new Object[1]; } LenskitRecommender lkrec = (LenskitRecommender) testUser.getRecommender(); ItemTagDAO tagDAO = lkrec.get(ItemTagDAO.class); TagVocabulary vocab = lkrec.get(TagVocabulary.class); logger.info("Got recommendations: {}", recommendations.size()); double entropy = entropy(vocab, tagDAO, recommendations); logger.info("Entropy: {}", entropy); totalEntropy += entropy; userCount += 1; return new Object[]{entropy}; } /** * Get the final aggregate results. This is called after all users have been evaluated, and * returns the values for the columns in the global output. * * @return The final, aggregated columns. */ @Nonnull @Override public Object[] finalResults() { // return a single field, the average entropy return new Object[]{totalEntropy / userCount}; } } }