/* * Copyright (C) 2015 RankSys http://ranksys.org * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package org.ranksys.examples; import es.uam.eps.ir.ranksys.fast.index.FastItemIndex; import es.uam.eps.ir.ranksys.fast.index.FastUserIndex; import es.uam.eps.ir.ranksys.fast.index.SimpleFastItemIndex; import es.uam.eps.ir.ranksys.fast.index.SimpleFastUserIndex; import es.uam.eps.ir.ranksys.fast.preference.FastPreferenceData; import es.uam.eps.ir.ranksys.fast.preference.SimpleFastPreferenceData; import java.io.IOException; import org.ranksys.compression.codecs.CODEC; import org.ranksys.compression.codecs.dsi.FixedLengthBitStreamCODEC; import org.ranksys.compression.codecs.lemire.IntegratedFORVBCODEC; import org.ranksys.compression.preferences.RatingCODECPreferenceData; import org.ranksys.formats.index.ItemsReader; import org.ranksys.formats.index.UsersReader; import static org.ranksys.formats.parsing.Parsers.lp; import org.ranksys.formats.preference.SimpleRatingPreferencesReader; /** * Example of usage of the RankSys-compression module. * <br> * If you use this code, please cite the following papers: * <ul> * <li>Vargas, S., Macdonald, C., Ounis, I. (2015). Analysing Compression Techniques for In-Memory Collaborative Filtering. In Poster Proceedings of the 9th ACM Conference on Recommender Systems. <a href="http://ceur-ws.org/Vol-1441/recsys2015_poster2.pdf">http://ceur-ws.org/Vol-1441/recsys2015_poster2.pdf</a>.</li> * <li>Catena, M., Macdonald, C., Ounis, I. (2014). On Inverted Index Compression for Search Engine Efficiency. In ECIR (pp. 359–371). doi:10.1007/978-3-319-06028-6_30</li> * </ul> * The code that reproduces the results of the RecSys 2015 poster by Vargas et al. in a separated project: <a href="http://github.com/saulvargas/recsys2015">http://github.com/saulvargas/recsys2015</a> * <br> * The search index compression technologies of the ECIR paper by Catena et al. is part of the Terrier IR Platform: <a href="http://terrier.org/docs/v4.0/compression.html">http://terrier.org/docs/v4.0/compression.html</a>. * * @author Saúl Vargas (Saul.Vargas@glasgow.ac.uk) */ public class CompressionExample { public static void main(String[] args) throws IOException, ClassNotFoundException { String userPath = args[0]; String itemPath = args[1]; String dataPath = args[2]; // READING USER, ITEM AND RATINGS FILES FastUserIndex<Long> users = SimpleFastUserIndex.load(UsersReader.read(userPath, lp)); FastItemIndex<Long> items = SimpleFastItemIndex.load(ItemsReader.read(itemPath, lp)); FastPreferenceData<Long, Long> simpleData = SimpleFastPreferenceData.load(SimpleRatingPreferencesReader.get().read(dataPath, lp, lp), users, items); // CREATING A COMPRESSED PREFERENCE DATA CODEC<int[]> uCodec = new IntegratedFORVBCODEC(); CODEC<int[]> iCodec = new IntegratedFORVBCODEC(); // We assume here that the ratings are 1-5 stars CODEC<byte[]> vCodec = new FixedLengthBitStreamCODEC(3); FastPreferenceData<Long, Long> codecData = new RatingCODECPreferenceData<>(simpleData, users, items, uCodec, iCodec, vCodec); // PRINTING COMPRESSION STATISTICS System.out.println(uCodec.stats()[0] + "\t" + uCodec.stats()[1]); System.out.println(iCodec.stats()[0] + "\t" + iCodec.stats()[1]); System.out.println(vCodec.stats()[0] + "\t" + vCodec.stats()[1]); System.out.println(codecData.numPreferences()); } }