/* * Copyright (C) 2015 Information Retrieval Group at Universidad Autónoma * de Madrid, http://ir.ii.uam.es * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package es.uam.eps.ir.ranksys.examples; import cc.mallet.topics.ParallelTopicModel; import es.uam.eps.ir.ranksys.fast.index.FastItemIndex; import es.uam.eps.ir.ranksys.fast.index.FastUserIndex; import es.uam.eps.ir.ranksys.fast.index.SimpleFastItemIndex; import es.uam.eps.ir.ranksys.fast.index.SimpleFastUserIndex; import es.uam.eps.ir.ranksys.fast.preference.FastPreferenceData; import es.uam.eps.ir.ranksys.fast.preference.SimpleFastPreferenceData; import es.uam.eps.ir.ranksys.mf.Factorization; import es.uam.eps.ir.ranksys.mf.als.HKVFactorizer; import es.uam.eps.ir.ranksys.mf.als.PZTFactorizer; import es.uam.eps.ir.ranksys.mf.plsa.PLSAFactorizer; import es.uam.eps.ir.ranksys.mf.rec.MFRecommender; import es.uam.eps.ir.ranksys.nn.item.ItemNeighborhoodRecommender; import es.uam.eps.ir.ranksys.nn.item.neighborhood.ItemNeighborhood; import es.uam.eps.ir.ranksys.nn.item.neighborhood.ItemNeighborhoods; import es.uam.eps.ir.ranksys.nn.item.sim.ItemSimilarities; import es.uam.eps.ir.ranksys.nn.item.sim.ItemSimilarity; import es.uam.eps.ir.ranksys.nn.user.UserNeighborhoodRecommender; import es.uam.eps.ir.ranksys.nn.user.neighborhood.UserNeighborhood; import es.uam.eps.ir.ranksys.nn.user.neighborhood.UserNeighborhoods; import es.uam.eps.ir.ranksys.nn.user.sim.UserSimilarities; import es.uam.eps.ir.ranksys.nn.user.sim.UserSimilarity; import es.uam.eps.ir.ranksys.rec.Recommender; import es.uam.eps.ir.ranksys.rec.fast.basic.PopularityRecommender; import es.uam.eps.ir.ranksys.rec.fast.basic.RandomRecommender; import es.uam.eps.ir.ranksys.rec.runner.RecommenderRunner; import es.uam.eps.ir.ranksys.rec.runner.fast.FastFilterRecommenderRunner; import es.uam.eps.ir.ranksys.rec.runner.fast.FastFilters; import org.jooq.lambda.Unchecked; import org.ranksys.fm.PreferenceFM; import org.ranksys.fm.learner.BPRLearner; import org.ranksys.fm.learner.RMSELearner; import org.ranksys.fm.rec.FMRecommender; import org.ranksys.formats.index.ItemsReader; import org.ranksys.formats.index.UsersReader; import org.ranksys.formats.preference.SimpleRatingPreferencesReader; import org.ranksys.formats.rec.RecommendationFormat; import org.ranksys.formats.rec.SimpleRecommendationFormat; import org.ranksys.lda.LDAModelEstimator; import org.ranksys.lda.LDARecommender; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.function.DoubleUnaryOperator; import java.util.function.Function; import java.util.function.IntPredicate; import java.util.function.Supplier; import java.util.stream.Collectors; import static org.ranksys.formats.parsing.Parsers.lp; /** * Example main of recommendations. * * @author Saúl Vargas (saul.vargas@uam.es) * @author Pablo Castells (pablo.castells@uam.es) */ public class RecommenderExample { public static void main(String[] args) throws IOException { String userPath = args[0]; String itemPath = args[1]; String trainDataPath = args[2]; String testDataPath = args[3]; FastUserIndex<Long> userIndex = SimpleFastUserIndex.load(UsersReader.read(userPath, lp)); FastItemIndex<Long> itemIndex = SimpleFastItemIndex.load(ItemsReader.read(itemPath, lp)); FastPreferenceData<Long, Long> trainData = SimpleFastPreferenceData.load(SimpleRatingPreferencesReader.get().read(trainDataPath, lp, lp), userIndex, itemIndex); FastPreferenceData<Long, Long> testData = SimpleFastPreferenceData.load(SimpleRatingPreferencesReader.get().read(testDataPath, lp, lp), userIndex, itemIndex); ////////////////// // RECOMMENDERS // ////////////////// Map<String, Supplier<Recommender<Long, Long>>> recMap = new HashMap<>(); // random recommendation recMap.put("rnd", () -> new RandomRecommender<>(trainData, trainData)); // most-popular recommendation recMap.put("pop", () -> new PopularityRecommender<>(trainData)); // user-based nearest neighbors recMap.put("ub", () -> { int k = 100; int q = 1; UserSimilarity<Long> sim = UserSimilarities.vectorCosine(trainData, true); UserNeighborhood<Long> neighborhood = UserNeighborhoods.topK(sim, k); return new UserNeighborhoodRecommender<>(trainData, neighborhood, q); }); // item-based nearest neighbors recMap.put("ib", () -> { int k = 10; int q = 1; ItemSimilarity<Long> sim = ItemSimilarities.vectorCosine(trainData, true); ItemNeighborhood<Long> neighborhood = ItemNeighborhoods.cached(ItemNeighborhoods.topK(sim, k)); return new ItemNeighborhoodRecommender<>(trainData, neighborhood, q); }); // implicit matrix factorization of Hu et al. 2008 recMap.put("hkv", () -> { int k = 50; double lambda = 0.1; double alpha = 1.0; DoubleUnaryOperator confidence = x -> 1 + alpha * x; int numIter = 20; Factorization<Long, Long> factorization = new HKVFactorizer<Long, Long>(lambda, confidence, numIter).factorize(k, trainData); return new MFRecommender<>(userIndex, itemIndex, factorization); }); // implicit matrix factorization of Pilaszy et al. 2010 recMap.put("pzt", () -> { int k = 50; double lambda = 0.1; double alpha = 1.0; DoubleUnaryOperator confidence = x -> 1 + alpha * x; int numIter = 20; Factorization<Long, Long> factorization = new PZTFactorizer<Long, Long>(lambda, confidence, numIter).factorize(k, trainData); return new MFRecommender<>(userIndex, itemIndex, factorization); }); // probabilistic latent semantic analysis of Hofmann 2004 recMap.put("plsa", () -> { int k = 50; int numIter = 100; Factorization<Long, Long> factorization = new PLSAFactorizer<Long, Long>(numIter).factorize(k, trainData); return new MFRecommender<>(userIndex, itemIndex, factorization); }); // LDA topic modelling by Blei et al. 2003 recMap.put("lda", Unchecked.supplier(() -> { int k = 50; double alpha = 1.0; double beta = 0.01; int numIter = 200; int burninPeriod = 50; ParallelTopicModel topicModel = LDAModelEstimator.estimate(trainData, k, alpha, beta, numIter, burninPeriod); return new LDARecommender<>(userIndex, itemIndex, topicModel); })); // Factorisation machine using a BRP-like loss recMap.put("fm-bpr", Unchecked.supplier(() -> { double learnRate = 0.01; int numIter = 200; double regW = 0.01; double regM = 0.01; int K = 100; double sdev = 0.1; PreferenceFM<Long, Long> prefFm = new BPRLearner<>(learnRate, numIter, regW, regM, userIndex, itemIndex).learn(trainData, testData, K, sdev); return new FMRecommender<>(prefFm); })); // Factorisation machine usinga RMSE-like loss with balanced sampling of negative // instances recMap.put("fm-rmse", Unchecked.supplier(() -> { double learnRate = 0.01; int numIter = 50; double regB = 0.01; double regW = 0.01; double regM = 0.01; double negativeProp = 2.0; int K = 100; double sdev = 0.1; PreferenceFM<Long, Long> prefFm = new RMSELearner<>(learnRate, numIter, regB, regW, regM, negativeProp, userIndex, itemIndex).learn(trainData, testData, K, sdev); return new FMRecommender<>(prefFm); })); //////////////////////////////// // GENERATING RECOMMENDATIONS // //////////////////////////////// Set<Long> targetUsers = testData.getUsersWithPreferences().collect(Collectors.toSet()); RecommendationFormat<Long, Long> format = new SimpleRecommendationFormat<>(lp, lp); Function<Long, IntPredicate> filter = FastFilters.notInTrain(trainData); int maxLength = 100; RecommenderRunner<Long, Long> runner = new FastFilterRecommenderRunner<>(userIndex, itemIndex, targetUsers.stream(), filter, maxLength); recMap.forEach(Unchecked.biConsumer((name, recommender) -> { System.out.println("Running " + name); try (RecommendationFormat.Writer<Long, Long> writer = format.getWriter(name)) { runner.run(recommender.get(), writer); } })); } }