package edu.umn.cs.recsys.svd;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.linear.SingularValueDecomposition;
import org.grouplens.lenskit.ItemScorer;
import org.grouplens.lenskit.baseline.BaselineScorer;
import org.grouplens.lenskit.core.Transient;
import org.grouplens.lenskit.cursors.Cursor;
import org.grouplens.lenskit.data.dao.ItemDAO;
import org.grouplens.lenskit.data.dao.UserDAO;
import org.grouplens.lenskit.data.dao.UserEventDAO;
import org.grouplens.lenskit.data.event.Event;
import org.grouplens.lenskit.data.event.Rating;
import org.grouplens.lenskit.data.event.Ratings;
import org.grouplens.lenskit.data.history.UserHistory;
import org.grouplens.lenskit.indexes.IdIndexMapping;
import org.grouplens.lenskit.vectors.MutableSparseVector;
import org.grouplens.lenskit.vectors.VectorEntry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Provider;
/**
* Model builder that computes the SVD model.
*/
public class SVDModelBuilder implements Provider<SVDModel> {
private static final Logger logger = LoggerFactory.getLogger(SVDModelBuilder.class);
private final UserEventDAO userEventDAO;
private final UserDAO userDAO;
private final ItemDAO itemDAO;
private final ItemScorer baselineScorer;
private final int featureCount;
/**
* Construct the model builder.
* @param uedao The user event DAO.
* @param udao The user DAO.
* @param idao The item DAO.
* @param baseline The baseline scorer (this will be used to compute means).
* @param nfeatures The number of latent features to train.
*/
@Inject
public SVDModelBuilder(@Transient UserEventDAO uedao,
@Transient UserDAO udao,
@Transient ItemDAO idao,
@Transient @BaselineScorer ItemScorer baseline,
@LatentFeatureCount int nfeatures) {
logger.debug("user DAO: {}", udao);
userEventDAO = uedao;
userDAO = udao;
itemDAO = idao;
baselineScorer = baseline;
featureCount = nfeatures;
}
/**
* Build the SVD model.
*
* @return A singular value decomposition recommender model.
*/
@Override
public SVDModel get() {
// Create index mappings of user and item IDs.
// You can use these to find row and columns in the matrix based on user/item IDs.
IdIndexMapping userMapping = IdIndexMapping.create(userDAO.getUserIds());
logger.debug("indexed {} users", userMapping.size());
IdIndexMapping itemMapping = IdIndexMapping.create(itemDAO.getItemIds());
logger.debug("indexed {} items", itemMapping.size());
// We have to do 2 things:
// First, prepare a matrix containing the rating data.
RealMatrix matrix = createRatingMatrix(userMapping, itemMapping);
// Second, compute its factorization
// All the work is done in the constructor
SingularValueDecomposition svd = new SingularValueDecomposition(matrix);
// Third, truncate the decomposed matrix
SVDModel svdModel = new SVDModel(userMapping, itemMapping,
svd.getU().getSubMatrix(0, userMapping.size() - 1, 0, featureCount - 1),
svd.getV().getSubMatrix(0, itemMapping.size() - 1, 0, featureCount - 1),
svd.getS().getSubMatrix(0, featureCount - 1, 0, featureCount - 1));
return svdModel;
}
/**
* Build a rating matrix from the rating data. Each user's ratings are first normalized
* by subtracting a baseline score (usually a mean).
*
* @param userMapping The index mapping of user IDs to column numbers.
* @param itemMapping The index mapping of item IDs to row numbers.
* @return A matrix storing the <i>normalized</i> user ratings.
*/
private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) {
final int nusers = userMapping.size();
final int nitems = itemMapping.size();
// Create a matrix with users on rows and items on columns
logger.info("creating {} by {} rating matrix", nusers, nitems);
RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems);
// populate it with data
Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser();
try {
for (UserHistory<Event> user: users) {
// Get the row number for this user
int u = userMapping.getIndex(user.getUserId());
MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class));
MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet());
baselineScorer.score(user.getUserId(), baselines);
for(VectorEntry v : ratings.fast())
{
matrix.setEntry(u, itemMapping.getIndex(v.getKey()), v.getValue() - baselineScorer.score(user.getUserId(), v.getKey()));
}
}
} finally {
users.close();
}
return matrix;
}
}