Pipeline.java example

Explorer
TagRec-master
- src
/*
 TagRecommender:
 A framework to implement and evaluate algorithms for the recommendation
 of tags.
 Copyright (C) 2013 Dominik Kowald
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package test;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import cc.mallet.topics.ParallelTopicModel;
import cc.mallet.types.InstanceList;
import common.Bookmark;
import common.CalculationType;
import common.Features;
import common.Utilities;
import engine.Algorithm;
import engine.EngineInterface;
import engine.EntityRecommenderEngine;
import engine.EntityType;
import engine.TagRecommenderEvalEngine;
import file.BookmarkReader;
import file.BookmarkSplitter;
import file.postprocessing.CatDescFiltering;
import file.preprocessing.BibsonomyProcessor;
import file.preprocessing.CiteULikeProcessor;
import file.preprocessing.LastFMProcessor;
import file.preprocessing.MovielensProcessor;
import file.preprocessing.PintsProcessor;
import file.preprocessing.TensorProcessor;
import itemrecommendations.CFResourceCalculator;
import itemrecommendations.CIRTTCalculator;
import itemrecommendations.HuangCalculator;
import itemrecommendations.MPResourceCalculator;
import itemrecommendations.SustainCalculator;
import itemrecommendations.ZhengCalculator;
import processing.BLLCalculator;
import processing.CFTagRecommender;
import processing.ContentBasedCalculator;
import processing.FolkRankCalculator;
import processing.GIRPTMCalculator;
import processing.MPCalculator;
import processing.MPurCalculator;
import processing.MalletCalculator;
import processing.MetricsCalculator;
import processing.RecencyCalculator;
import processing.ThreeLTCalculator;
import processing.analyzing.UserTagDistribution;
import processing.hashtag.HashtagRecommendationEngine;
import processing.hashtag.analysis.ProcessFrequencyRecency;
import processing.hashtag.analysis.ProcessFrequencyRecencySocial;
import processing.hashtag.social.SocialStrengthCalculator;
import processing.hashtag.solr.CFSolrHashtagCalculator;
import processing.hashtag.solr.SolrHashtagCalculator;
import processing.hashtag.solr.Tweet;

public class Pipeline {

    // are set automatically in code
    private static int TRAIN_SIZE;
    private static int TEST_SIZE;
    // set for postprocessing (number of bookmarks - null is nothing)
    private final static Integer MIN_USER_BOOKMARKS = null;
    private final static Integer MAX_USER_BOOKMARKS = null;
    private final static Integer MIN_RESOURCE_BOOKMARKS = null;
    private final static Integer MAX_RESOURCE_BOOKMARKS = null;
    // set for categorizer/describer split (true is describer, false is
    // categorizer - null for nothing)
    private final static Boolean DESCRIBER = null;
    // placeholder for the topic posfix
    private static String TOPIC_NAME = null;
    // placeholder for the used dataset
    private final static String DATASET = "twitter";
    private final static String SUBDIR = "/researchers";

    public static void main(String[] args) {
        System.out.println(
                "TagRecommender:\n" + "" + "A framework to implement and evaluate algorithms for the recommendation\n"
                        + "of tags." + "Copyright (C) 2013 - 2015 Dominik Kowald\n\n"
                        + "This program is free software: you can redistribute it and/or modify\n"
                        + " it under the terms of the GNU Affero General Public License as published by\n"
                        + "the Free Software Foundation, either version 3 of the License, or\n"
                        + "(at your option) any later version.\n\n"
                        + "This program is distributed in the hope that it will be useful,\n"
                        + "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
                        + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
                        + "GNU Affero General Public License for more details.\n\n"
                        + "You should have received a copy of the GNU Affero General Public License\n"
                        + "along with this program.  If not, see <http://www.gnu.org/licenses/>.\n"
                        + "-----------------------------------------------------------------------------\n\n");
        
        String dir = DATASET + "_core" + SUBDIR + "/";
        String path = dir + DATASET + "_sample";
        String networkFileName = "./data/csv/" + dir + "network.txt";
        String solrServerNameWithPort = ""; // only necessary when solr core is used

        // Method Testing -> just uncomment the methods you want to test
        // Test the BLL and BLL+MP_r algorithms (= baseline to beat :))
        // startActCalculator(dir, path, 1, 0.5, null, -5, false,
        // CalculationType.NONE, false);

        // Test the BLL_AC and BLL_AC+MP_r algorithms (could take a while)
        // startActCalculator(dir, path, 1, -5, -5, false,
        // CalculationType.USER_TO_RESOURCE_ONLY, false);
        // startActCalculator(dir, path, 1, 0.5, null, -5, true,
        // CalculationType.USER_TO_RESOURCE, false);

        // Test the MR approach
        // startRecCalculator(dir, path);

        // Test the GIRP and GIRPTM algorithms
        // startGirpCalculator(dir, path, true);

        // Test the MP_u, MP_r and MP_u_r algorithms
        // startModelCalculator(dir, path, 1, -5, true);

        // Test the MP algorithm
        // startBaselineCalculator(dir, path, 1, true);

        // Test the CF_u, CF_r and CF_u_r algorithms with 20 neighbors (change
        // it if you want)
        // startCfTagCalculator(dir, path, 1, 20, -5, false);

        // Test the PR and FR algorithms
        // startFolkRankCalculator(dir, path, 1);

        // Test the LDA algorithm with 1000 topics (change it if you want)
        // startLdaCalculator(dir, path, 1000, 1, false);

        // Test the 3L algorithm
        // start3LayersJavaCalculator(dir, path, "", 1, -5, -5, true, false,
        // false);

        // Test the 3L_tag algorithm
        // start3LayersJavaCalculator(dir, path, "", 1, -5, -5, true, true,
        // false);

        // Test the 3LT_topic algorithm
        // start3LayersJavaCalculator(dir, path, "", 1, -5, -5, true, false,
        // true);

        // Commandline Arguments
        if (args.length < 3) {
            System.out.println("Too few arguments!");
            return;
        }

        String subdir = "/";
        String op = args[0];
        String samplePath = "", sampleDir = "", sampleNetwork = "";
        int sampleCount = 1;

        if (args[1].equals("cul")) {
            sampleDir = "cul_core";
        } else if (args[1].equals("flickr")) {
            sampleDir = "flickr_core";
        } else if (args[1].equals("bib")) {
            sampleDir = "bib_core";
        } else if (args[1].equals("wiki")) {
            sampleDir = "wiki_core";
        } else if (args[1].equals("ml")) {
            sampleDir = "ml_core";
        } else if (args[1].equals("lastfm")) {
            sampleDir = "lastfm_core";
        } else if (args[1].equals("del")) {
            sampleDir = "del_core";
        } else if (args[1].equals("twitter_res")) {
            sampleDir = "twitter_core";
            subdir = "/researchers";
        } else if (args[1].equals("twitter_gen")) {
            sampleDir = "twitter_core";
            subdir = "/general";
        } else {
            System.out.println("Dataset not available");
            return;
        }
        sampleDir += subdir;
        samplePath += (sampleDir + "/" + args[2]);
        sampleNetwork = "./data/csv/" + sampleDir + "/network.txt";

        boolean narrowFolksonomy = args[1].equals("flickr") || args[1].contains("twitter");
        if (op.equals("cf")) {
            startCfTagCalculator(sampleDir, samplePath, sampleCount, 20, -5, false);
        } else if (op.equals("cfr")) {
            startCfTagCalculator(sampleDir, samplePath, sampleCount, 20, -5, !narrowFolksonomy);
        } else if (op.equals("fr")) {
            startFolkRankCalculator(sampleDir, samplePath, sampleCount);
        } else if (op.equals("bll_c")) {
            startActCalculator(sampleDir, samplePath, sampleCount, 0.5, null, -5, !narrowFolksonomy,
                    CalculationType.NONE, true);
        } else if (op.equals("bll_c_ac")) {
            if (!narrowFolksonomy) {
                startActCalculator(sampleDir, samplePath, sampleCount, 0.5, null, -5, !narrowFolksonomy,
                        CalculationType.USER_TO_RESOURCE, true);
            }
        } else if (op.equals("girptm")) {
            startGirpCalculator(sampleDir, samplePath, !narrowFolksonomy);
        } else if (op.equals("mp_ur")) {
            startModelCalculator(sampleDir, samplePath, sampleCount, -5, !narrowFolksonomy);
        } else if (op.equals("mp")) {
            startBaselineCalculator(sampleDir, samplePath, sampleCount, true);
        } else if (op.equals("3layers")) {
            start3LayersJavaCalculator(sampleDir, samplePath, "", sampleCount, -5, -5, !narrowFolksonomy, false, false);
        } else if (op.equals("3LT")) {
            start3LayersJavaCalculator(sampleDir, samplePath, "", sampleCount, -5, -5, !narrowFolksonomy, true, false);
            start3LayersJavaCalculator(sampleDir, samplePath, "", sampleCount, -5, -5, !narrowFolksonomy, false, true);
        } else if (op.equals("lda")) {
            startLdaCalculator(sampleDir, samplePath, 1000, sampleCount, !narrowFolksonomy);
        } else if (op.equals("lda_samples")) {
            createLdaSamples(samplePath, sampleCount, 1000, true, false);
        } else if (op.equals("tensor_samples")) {
            writeTensorFiles(samplePath, true);
        } else if (op.equals("mymedialite_samples")) {
            writeTensorFiles(samplePath, false);
        } else if (op.equals("core")) {
            BookmarkSplitter.calculateCore(samplePath, samplePath, 3, 3, 3);
        } else if (op.equals("split_l1o")) {
            BookmarkSplitter.splitSample(samplePath, samplePath, sampleCount, 0, true, false, true, null, sampleNetwork);
        } else if (op.equals("split_8020")) {
            BookmarkSplitter.splitSample(samplePath, samplePath, sampleCount, 20, false, false, true, null, sampleNetwork);
        } else if (op.equals("percentage_sample")) {
            BookmarkSplitter.drawUserPercentageSample(samplePath, 3, 1);
        } else if (op.equals("process_bibsonomy")) {
            BibsonomyProcessor.processUnsortedFile(sampleDir, "tas", args[2]);
        } else if (op.equals("process_citeulike")) {
            CiteULikeProcessor.processFile("current", args[2]);
        } else if (op.equals("process_lastfm")) {
            LastFMProcessor.processFile("user_taggedartists-timestamps.dat", args[2]);
        } else if (op.equals("process_ml")) {
            MovielensProcessor.processFile("tags.dat", args[2], "ratings.dat");
        } else if (op.equals("process_del")) {
            PintsProcessor.processFile(sampleDir, "delicious", args[2]);
        } else if (op.equals("process_flickr")) {
            PintsProcessor.processFile(sampleDir, "flickr", args[2]);
        } else if (op.equals("item_mp")) {
            startBaselineCalculatorForResources(sampleDir, samplePath, sampleCount, false, false);
        } else if (op.equals("item_cft")) {
            startCfResourceCalculator(sampleDir, samplePath, sampleCount, 20, true, false, false, false, Features.TAGS,
                    false);
        } else if (op.equals("item_cfb")) {
            startCfResourceCalculator(sampleDir, samplePath, sampleCount, 20, true, false, false, false,
                    Features.ENTITIES, false);
        } else if (op.equals("item_cbt")) {
            TOPIC_NAME = "lda_500";
            startCfResourceCalculator(sampleDir, samplePath, 1, 20, false, true, false, false, Features.TOPICS, false);
        } else if (op.equals("item_zheng")) {
            startZhengResourceCalculator(sampleDir, samplePath, sampleCount);
        } else if (op.equals("item_huang")) {
            startHuangResourceCalculator(sampleDir, samplePath, sampleCount);
        } else if (op.equals("item_cirtt")) {
            startResourceCIRTTCalculator(sampleDir, samplePath, "", sampleCount, 20, Features.ENTITIES, false, true,
                    false, true);
        } else if (op.equals("item_sustain")) {
            startSustainApproach(sampleDir, samplePath, 2.845, 0.5, 6.396, 0.0936, 0, 0, 20, 0.5);
        } else if (op.equals("stats")) {
            try {
                getStatistics(samplePath, false);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else if (op.equals("hashtag_analysis")) {
            analysisSocial(sampleDir, samplePath, sampleNetwork, "all", null);
        } else if (op.equals("hashtag_hybrid")) {
            startSocialRecommendation(sampleDir, samplePath, sampleNetwork, "hybrid", 0.5, null, 0.5, null, null, null);
        } else if (op.equals("hashtag_socialmp")) {
            startSocialRecommendation(sampleDir, samplePath, sampleNetwork, "social_freq", 0.5, null, 0.5, null, null, null);
        } else if (op.equals("hashtag_socialbll")) {
            startSocialRecommendation(sampleDir, samplePath, sampleNetwork, "social", 0.5, null, 0.5, null, null, null);
        } else if (op.equals("hashtag_social_recency")) {
            startSocialRecommendation(sampleDir, samplePath, sampleNetwork, "social_recency", 0.5, null, 0.5, null,
                    null, null);
        } else if (op.equals("hashtag_cb_res")) {
            startSocialRecommendation(sampleDir, samplePath, sampleNetwork, "hybrid", 1.699, null, 1.242, null,
            		solrServerNameWithPort, "researcher");
        } else if (op.equals("hashtag_cb_gen")) {
            startSocialRecommendation(sampleDir, samplePath, sampleNetwork, "hybrid", 1.723, null, 1.269, null,
            		solrServerNameWithPort, "general");
        } else {
            System.out.println("Unknown operation");
        }
    }

    // Tag Recommenders methods
    // ---------------------------------------------------------------------------------------------------------------------------------------------
    private static void startSolrHashtagCalculator(String sampleDir, String samplePath, String solrUrl, String solrCore,  boolean train, boolean hours, Integer mostRecentTweets) {
        if (train) {
            String suffix = SolrHashtagCalculator.predictTrainSample(sampleDir, solrCore, solrUrl, hours,
                    mostRecentTweets);
            writeMetrics(sampleDir, sampleDir + "/" + solrCore, suffix, 1, 10, null, null, null);
        } else {
            // using test set content!
            String suffix = SolrHashtagCalculator.predictSample(sampleDir, solrCore, solrUrl);
            writeMetrics(sampleDir, sampleDir + "/" + solrCore, suffix, 1, 10, null, null, null);
        }
    }

    private static void startAllTagRecommenderApproaches(String sampleDir, String samplePath, boolean all) {
        startBaselineCalculator(sampleDir, samplePath, 1, true); // MP
        startModelCalculator(sampleDir, samplePath, 1, -5, all); // MPur
        startGirpCalculator(sampleDir, samplePath, all); // GIRPTM
        startActCalculator(sampleDir, samplePath, 1, 0.5, null, -5, all, CalculationType.NONE, true); // BLL
        startActCalculator(sampleDir, samplePath, 1, 0.5, null, -5, all, CalculationType.USER_TO_RESOURCE, true); // BLLac
        start3LayersJavaCalculator(sampleDir, samplePath, "", 1, -5, -5, all, false, false); // 3L
        start3LayersJavaCalculator(sampleDir, samplePath, "", 1, -5, -5, all, true, false); // 3LTtop
        start3LayersJavaCalculator(sampleDir, samplePath, "", 1, -5, -5, all, false, true); // 3LTtag
        startCfTagCalculator(sampleDir, samplePath, 1, 20, -5, false); // CFur
        startFolkRankCalculator(sampleDir, samplePath, 1); // APR+FR
        startLdaCalculator(sampleDir, samplePath, 1000, 1, all); // LDA
    }

    private static void startSampleTagRecommenderApproaches(String sampleDir, String samplePath, boolean all) {
        startModelCalculator(sampleDir, samplePath, 1, -5, all); // MPur
        startGirpCalculator(sampleDir, samplePath, all); // GIRPTM
        startActCalculator(sampleDir, samplePath, 1, 0.5, null, -5, all, CalculationType.USER_TO_RESOURCE, false); // BLLac
        startFolkRankCalculator(sampleDir, samplePath, 1); // APR+FR
    }

    private static void startActCalculator(String sampleDir, String sampleName, int sampleCount, double dVal,
            Double lambda, int betaUpperBound, boolean all, CalculationType type, boolean allMetrics) {
        getTrainTestSize(sampleName);
        List<Integer> betaValues = getBetaValues(betaUpperBound);
        String ac = type == CalculationType.USER_TO_RESOURCE ? "_ac" : "";
        BookmarkReader reader = null;

        for (int i = 1; i <= sampleCount; i++) {
            reader = BLLCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, true, false, dVal, 5, type, lambda);
            if (type == CalculationType.USER_TO_RESOURCE_ONLY) {
                writeMetrics(sampleDir, sampleName, "ac_5_5", sampleCount, 10, null, allMetrics ? reader : null, null);
            } else {
                writeMetrics(sampleDir, sampleName, "bll" + ac + "_" + 5 + "_" + dVal, sampleCount, 10, null,
                        allMetrics ? reader : null, null);
            }
            if (all) {
                for (int betaVal : betaValues) {
                    reader = BLLCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, true, true, dVal, betaVal,
                            type, lambda);
                    writeMetrics(sampleDir, sampleName, "bll_c" + ac + "_" + betaVal + "_" + dVal, sampleCount, 10,
                            null, allMetrics ? reader : null, null);
                }
            }
        }
    }

    private static void startSocialRecommendation(String sampleDir, String sampleName, String networkFilename,
            String algo, double dIndividual, Double lambdaIndividual, double dSocial, Double lambdaSocial,
            String solrUrl, String solrCore) {
        double betaBLL = 0.5;
        double betaCB = 0.3;

        String[] algos = null;
        if (algo == null) {
            algos = new String[] { "social_freq", "social", "hybrid", "social_recency", "social_link_weight" };
        } else {
            algos = new String[] { algo };
        }
        System.out.println("algos >> " + algos);
        getTrainTestSize(sampleName);
        Map<Integer, Map<Integer, Double>> contentBasedValues = null;

        if (solrUrl != null && solrCore != null) {
            BookmarkReader reader = new BookmarkReader(0, false);
            reader.readFile(sampleName);
            if (new File("./data/results/" + sampleDir + "/" + solrCore + "_cbpredictions.ser").exists()) {
                System.out.println("Found cb file ...");
                contentBasedValues = SolrHashtagCalculator.deSerializeHashtagPrediction(
                        "./data/results/" + sampleDir + "/" + solrCore + "_cbpredictions.ser");
            } else {
                contentBasedValues = SolrHashtagCalculator.getNormalizedHashtagPredictions(sampleDir, solrCore, solrUrl,
                        reader, null);
                writeMetrics(sampleDir, sampleDir + "/" + solrCore, "solrht_normalized", 1, 10, null, null, null);
            }
            System.out.println("Number of content-based recommendations: " + contentBasedValues.size());
        }

        for (String a : algos) {
            System.out.println("Algorithm >> " + a);
            HashtagRecommendationEngine calculator = null;
            if ("social_link_weight".equals(a)) {
                String mentionFilename = "./data/csv/" + sampleDir + "/mentionNetwork.txt";
                String retweetFilename = "./data/csv/" + sampleDir + "/retweetNetwork.txt";
                String replyFilename = "./data/csv/" + sampleDir + "/replyNetwork.txt";
                System.out.println("Social init ... ");
                calculator = new HashtagRecommendationEngine(sampleDir, sampleName, networkFilename, mentionFilename,
                        retweetFilename, replyFilename, TRAIN_SIZE, TEST_SIZE, dIndividual, lambdaIndividual);
                System.out.println("Social init done ... ");
            } else if ("social_top_per_temp".equals(a)) {
                System.out.println("Solr Core >> " + solrCore);
                System.out.println("Solr Url >> " + solrUrl);
                calculator = new HashtagRecommendationEngine(sampleDir, sampleName, networkFilename, solrUrl, solrCore,
                        TRAIN_SIZE, TEST_SIZE, dIndividual, lambdaIndividual);
            } else if ("hybrid_link".equals(a)) {
                String mentionFilename = "./data/csv/" + sampleDir + "/mentionNetwork.txt";
                String retweetFilename = "./data/csv/" + sampleDir + "/retweetNetwork.txt";
                String replyFilename = "./data/csv/" + sampleDir + "/replyNetwork.txt";
                SocialStrengthCalculator socialStrengthCalculator = new SocialStrengthCalculator(mentionFilename,
                        retweetFilename, replyFilename);
                calculator = new HashtagRecommendationEngine(sampleDir, sampleName, networkFilename, TRAIN_SIZE,
                        TEST_SIZE, dIndividual, lambdaIndividual);
                calculator.setSocialStrengthCalculator(socialStrengthCalculator);
            } else {
                System.out.println("Social init ... ");
                calculator = new HashtagRecommendationEngine(sampleDir, sampleName, networkFilename, TRAIN_SIZE,
                        TEST_SIZE, dIndividual, lambdaIndividual);
                System.out.println("Social init done ... ");
            }

            if ("social_top_per_temp".equals(a)) {
                BufferedWriter bw = null;
                try {
                    bw = new BufferedWriter(new FileWriter(
                            "./data/results/" + sampleDir + "/social_top_per_temp_etah_etal_ndcg.txt", true));
                } catch (IOException e1) {
                    e1.printStackTrace();
                }
                double eta_h = 0.1;
                double eta_l = 0.2;
                String suffix = "social_top_per_temp_" + eta_h + "_" + eta_l + "_" + a;
                System.out.println(" Pipeline >> eta_h " + new DecimalFormat("##.##").format(eta_h) + " >> eta_l >> "
                        + new DecimalFormat("##.##").format(eta_l));
                calculator.setEta_h(eta_h);
                calculator.setEta_l(eta_l);
                calculator.predictSample(betaBLL, betaCB, dSocial, lambdaSocial, a, null, suffix);
                writeMetrics(sampleDir, sampleName, suffix, 1, 10, null, null, null);
                double ndcg10 = MetricsCalculator.getNDCG10();
                String line = eta_h + ";" + eta_l + ";" + ndcg10 + "\n";
                System.out.println(" line >> " + line);
            } else {
                String suffix = "social" + betaCB + "_" + dSocial + "_" + a;
                calculator.predictSample(betaBLL, betaCB, dSocial, lambdaSocial, a, contentBasedValues, suffix);
                writeMetrics(sampleDir, sampleName, suffix, 1, 10, null, null, null);
                System.out.println("Algorithm done >> " + a);
            }
        }
    }

    private static void startCfCbHashtagCalculator(String sampleDir, String sampleName, double beta, String solrUrl,
            String solrCore) {
        getTrainTestSize(sampleName);
        CFSolrHashtagCalculator.predictSample(sampleDir, sampleName, TRAIN_SIZE, beta, solrUrl, solrCore);
        writeMetrics(sampleDir, sampleName, "cf_cb_" + beta, 1, 10, null, null, null);
    }

    private static void analysisSocial(String sampleDir, String sampleName, String networkFilename, String type,
            Integer granularity) {
        getTrainTestSize(sampleName);
        HashtagRecommendationEngine calculator = new HashtagRecommendationEngine(sampleDir, sampleName, networkFilename,
                TRAIN_SIZE, TEST_SIZE, 0.5, null);
        if (type.equals("social")) {
            new ProcessFrequencyRecencySocial(sampleDir, calculator.getUserTagTimes(), calculator.getNetwork(),
                    granularity);
        } else if (type.equals("personal")) {
            new ProcessFrequencyRecency().ProcessTagAnalytics(sampleDir, calculator.getUserTagTimes(), granularity);
        } else if (type.equals("all")) {
            new ProcessFrequencyRecency().ProcessTagAnalytics(sampleDir, calculator.getUserTagTimes(), granularity);
            new ProcessFrequencyRecencySocial(sampleDir, calculator.getUserTagTimes(), calculator.getNetwork(),
                    granularity);
        }
    }

    private static void startGirpCalculator(String sampleDir, String sampleName, boolean all) {
        getTrainTestSize(sampleName);
        BookmarkReader reader = null;
        reader = GIRPTMCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, true, false);
        writeMetrics(sampleDir, sampleName, "girp", 1, 10, null, reader, null);
        if (all) {
            reader = GIRPTMCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, true, true);
            writeMetrics(sampleDir, sampleName, "girptm", 1, 10, null, reader, null);
        }
    }

    private static void startModelCalculator(String sampleDir, String sampleName, int sampleCount, int betaUpperBound,
            boolean all) {
        getTrainTestSize(sampleName);
        List<Integer> betaValues = getBetaValues(betaUpperBound);
        BookmarkReader reader = null;

        for (int i = 1; i <= sampleCount; i++) {
            reader = MPurCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, true, false, 5);
            if (all)
                reader = MPurCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, false, true, 5);
        }
        writeMetrics(sampleDir, sampleName, "mp_u_" + 5, sampleCount, 10, null, reader, null);
        if (all)
            writeMetrics(sampleDir, sampleName, "mp_r_" + 5, sampleCount, 10, null, reader, null);
        if (all) {
            for (int beta : betaValues) {
                for (int i = 1; i <= sampleCount; i++) {
                    reader = MPurCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, true, true, beta);
                }
                writeMetrics(sampleDir, sampleName, "mp_ur_" + beta, sampleCount, 10, null, reader, null);
            }
        }
    }

    private static void startCfTagCalculator(String sampleDir, String sampleName, int sampleCount, int neighbors,
            int betaUpperBound, boolean all) {
        getTrainTestSize(sampleName);
        List<Integer> betaValues = getBetaValues(betaUpperBound);
        BookmarkReader reader = null;
        for (int i = 1; i <= sampleCount; i++) {
            reader = CFTagRecommender.predictTags(sampleName, TRAIN_SIZE, TEST_SIZE, neighbors, true, false, 5);
            if (all)
                reader = CFTagRecommender.predictTags(sampleName, TRAIN_SIZE, TEST_SIZE, neighbors, false, true, 5);
        }
        writeMetrics(sampleDir, sampleName, "usercf_" + 5, sampleCount, 10, null, reader, null);
        if (all)
            writeMetrics(sampleDir, sampleName, "rescf_" + 5, sampleCount, 10, null, reader, null);

        if (all) {
            for (int beta : betaValues) {
                for (int i = 1; i <= sampleCount; i++) {
                    reader = CFTagRecommender.predictTags(sampleName, TRAIN_SIZE, TEST_SIZE, neighbors, true, true,
                            beta);
                }
                writeMetrics(sampleDir, sampleName, "cf_" + beta, sampleCount, 10, null, reader, null);
            }
        }
    }

    private static void startFolkRankCalculator(String sampleDir, String sampleName, int size) {
        getTrainTestSize(sampleName);
        BookmarkReader reader = null;
        for (int i = 1; i <= size; i++) {
            reader = FolkRankCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE);
        }
        writeMetrics(sampleDir, sampleName, "fr", size, 10, null, reader, null);
        writeMetrics(sampleDir, sampleName, "apr", size, 10, null, reader, null);
    }

    private static void startBaselineCalculator(String sampleDir, String sampleName, int size, boolean mp) {
        getTrainTestSize(sampleName);
        BookmarkReader reader = null;
        for (int i = 1; i <= size; i++) {
            reader = MPCalculator.predictPopularTags(sampleName, TRAIN_SIZE, TEST_SIZE, mp);
        }
        writeMetrics(sampleDir, sampleName, "mp", size, 10, null, reader, null);
    }

    private static void startLdaCalculator(String sampleDir, String sampleName, int topics, int sampleCount,
            boolean all) {
        getTrainTestSize(sampleName);
        BookmarkReader reader = null;
        for (int i = 1; i <= sampleCount; i++) {
            reader = MalletCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE, topics, true, all);
        }
        writeMetrics(sampleDir, sampleName, "lda_" + topics, sampleCount, 10, null, reader, null);
    }

    private static void start3LayersJavaCalculator(String sampleDir, String sampleName, String topicString, int size,
            int dUpperBound, int betaUpperBound, boolean resBased, boolean tagBLL, boolean topicBLL) {
        getTrainTestSize(sampleName);
        List<Integer> dValues = getBetaValues(dUpperBound);
        List<Integer> betaValues = getBetaValues(betaUpperBound);
        String suffix = "layers";
        if (tagBLL && topicBLL) {
            suffix += "bll";
        } else if (tagBLL) {
            suffix += "tagbll";
        } else if (topicBLL) {
            suffix += "topicbll";
        }

        BookmarkReader reader = null;
        for (int i = 1; i <= size; i++) {
            for (int d : dValues) {
                if (resBased) {
                    for (int b : betaValues) {
                        reader = ThreeLTCalculator.predictSample(
                                sampleName + (!topicString.isEmpty() ? "_" + topicString : ""), TRAIN_SIZE, TEST_SIZE,
                                d, b, true, true, tagBLL, topicBLL, CalculationType.NONE);
                        writeMetrics(sampleDir, sampleName, suffix + "_" + b + "_" + d, size, 10,
                                !topicString.isEmpty() ? topicString : null, reader, null);
                    }
                }
                reader = ThreeLTCalculator.predictSample(sampleName + (!topicString.isEmpty() ? "_" + topicString : ""),
                        TRAIN_SIZE, TEST_SIZE, d, 5, true, false, tagBLL, topicBLL, CalculationType.NONE);
                writeMetrics(sampleDir, sampleName, "user" + suffix + "_" + 5 + "_" + d, size, 10,
                        !topicString.isEmpty() ? topicString : null, reader, null);
            }
        }
    }

    private static void startContentBasedCalculator(String sampleDir, String sampleName) {
        getTrainTestSize(sampleName);
        BookmarkReader reader = ContentBasedCalculator.predictSample(sampleName, TRAIN_SIZE, TEST_SIZE);
        writeMetrics(sampleDir, sampleName, "cb", 1, 10, null, reader, null);
    }

    // Helpers
    // -----------------------------------------------------------------------------------------------------------------------------------------------------------
    private static void createLdaSamples(String sampleName, int size, int topics, boolean tagrec,
            boolean personalizedTopicCreation) {
        getTrainTestSize(sampleName);
        for (int i = 1; i <= size; i++) {
            MalletCalculator.createSample(sampleName, (short) topics, tagrec, TRAIN_SIZE, personalizedTopicCreation);
        }
    }

    private static void writeTensorFiles(String sampleName, boolean tagRec) {
        getTrainTestSize(sampleName);
        CatDescFiltering filter = null;
        if (DESCRIBER != null) {
            filter = CatDescFiltering.instantiate(sampleName, TRAIN_SIZE);
            filter.setDescriber(DESCRIBER.booleanValue());
        }

        TensorProcessor.writeFiles(sampleName, TRAIN_SIZE, TEST_SIZE, tagRec, MIN_USER_BOOKMARKS, MAX_USER_BOOKMARKS,
                filter);
    }

    private static void writeMetrics(String sampleDir, String sampleName, String prefix, int sampleCount, int k,
            String posfix, BookmarkReader reader, Integer trainSize) {
        CatDescFiltering filter = null;
        if (DESCRIBER != null) {
            filter = CatDescFiltering.instantiate(sampleName, TRAIN_SIZE);
            filter.setDescriber(DESCRIBER.booleanValue());
        }

        String topicString = ((posfix == null || posfix == "0") ? "" : "_" + posfix);
        for (int i = 1; i <= k; i++) {
            for (int j = 1; j <= sampleCount; j++) {
                MetricsCalculator.calculateMetrics(sampleName + topicString + "_" + prefix, i,
                        sampleDir + "/" + prefix + topicString + "_metrics", false, reader, MIN_USER_BOOKMARKS,
                        MAX_USER_BOOKMARKS, MIN_RESOURCE_BOOKMARKS, MAX_RESOURCE_BOOKMARKS, filter, true, trainSize);
            }
            MetricsCalculator.writeAverageMetrics(sampleDir + "/" + prefix + topicString + "_metrics", i,
                    (double) sampleCount, true, i == k, DESCRIBER);
        }
        MetricsCalculator.resetMetrics();

    }

    // e.g., -5 will be transformed to 0.5 and 2 will be transformed to 0.1 and
    // 0.2
    private static List<Integer> getBetaValues(int betaUpperBound) {
        List<Integer> betaValues = new ArrayList<Integer>();
        if (betaUpperBound < 0) {
            betaValues.add(betaUpperBound * (-1));
        } else {
            for (int betaVal = 1; betaVal <= betaUpperBound; betaVal++) {
                betaValues.add(betaVal);
            }
        }
        return betaValues;
    }

    private static void getTrainTestStatistics(String dataset) {
        System.out.println("FULL SET -----");
        getStatistics(dataset, false);
        System.out.println("TRAIN SET -----");
        getStatistics(dataset + "_train", false);
        System.out.println("TEST SET -----");
        getStatistics(dataset + "_test", false);
    }

    private static void getStatistics(String dataset, boolean writeAll) {
        if (TOPIC_NAME != null) {
            dataset += ("_" + TOPIC_NAME);
        }
        BookmarkReader reader = new BookmarkReader(0, false);
        reader.readFile(dataset);

        int bookmarks = reader.getBookmarks().size();
        System.out.println("Posts: " + bookmarks);
        int users = reader.getUsers().size();
        System.out.println("Users: " + users);
        int resources = reader.getResources().size();
        System.out.println("Resources: " + resources);
        int tags = reader.getTags().size();
        System.out.println("Tags: " + tags);
        int tagAssignments = reader.getTagAssignmentsCount();
        System.out.println("Tag-Assignments: " + tagAssignments);
        int categories = reader.getCategories().size();
        System.out.println("Topics: " + categories);
        double avgTASPerPost = (double) tagAssignments / bookmarks;
        System.out.println("Avg. TAS per post: " + avgTASPerPost);
        double avgBookmarksPerUser = (double) bookmarks / users;
        System.out.println("Avg. resources/posts per user: " + avgBookmarksPerUser);
        double avgBookmarksPerResource = (double) bookmarks / resources;
        System.out.println("Avg. users/posts per resource: " + avgBookmarksPerResource);

        System.out.println("First timestamp: " + reader.getFirstTimestamp().toString());
        System.out.println("Last timestamp: " + reader.getLastTimestamp().toString());

        // write user distribution
        UserTagDistribution.calculate(reader, dataset);
        if (writeAll) {
            try {
                getTrainTestSize(dataset);
                FileWriter userWriter = new FileWriter(new File("./data/metrics/" + dataset + "_userStats.txt"));
                BufferedWriter userBW = new BufferedWriter(userWriter);
                userBW.write("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n");
                List<Bookmark> trainList = reader.getBookmarks().subList(0, TRAIN_SIZE);
                List<Integer> testUsers = reader.getUniqueUserListFromTestSet(TRAIN_SIZE);
                System.out.println();

                double avgTopicsPerUser = 0.0;
                double avgTopicDiversityPerUser = 0.0;
                List<Map<Integer, Double>> userTopics = Utilities.getRelativeTopicMaps(trainList, false);
                List<List<Bookmark>> userBookmarks = Utilities.getBookmarks(trainList, false);
                for (int userID : testUsers) {
                    Map<Integer, Double> topicsOfUser = userTopics.get(userID);
                    double topicDiversityOfUser = Bookmark.getBookmarkDiversity(userBookmarks.get(userID));
                    userBW.write(userID + "| " + reader.getUserCounts().get(userID) + "| "
                            + topicsOfUser.keySet().size() + "| " + topicDiversityOfUser + "\n");
                    avgTopicsPerUser += topicsOfUser.keySet().size();
                    avgTopicDiversityPerUser += topicDiversityOfUser;
                }
                System.out.println("Avg. topics per user: " + avgTopicsPerUser / testUsers.size());
                System.out.println("Avg. topic-similarity per user: " + avgTopicDiversityPerUser / testUsers.size());
                double avgTopicsPerResource = Bookmark.getAvgNumberOfTopics(trainList);
                System.out.println("Avg. topics per resource: " + avgTopicsPerResource);
                userBW.flush();
                userBW.close();
            } catch (IOException e) {
                System.out.println(e.getMessage());
            }
        }

        System.out.println();
    }

    private static void getTrainTestSize(String sample) {
        if (TOPIC_NAME != null) {
            sample += ("_" + TOPIC_NAME);
        }
        BookmarkReader trainReader = new BookmarkReader(-1, false);
        trainReader.readFile(sample + "_train");
        TRAIN_SIZE = trainReader.getBookmarks().size();
        System.out.println("Train-size: " + TRAIN_SIZE);
        BookmarkReader testReader = new BookmarkReader(-1, false);
        testReader.readFile(sample + "_test");
        TEST_SIZE = testReader.getBookmarks().size();
        System.out.println("Test-size: " + TEST_SIZE);
    }

    /**
     * 
     * Passing the trainSize means that MyMediaLite files will be evaluated
     * 
     * 
     */
    private static void evaluate(String sampleDir, String sampleName, String prefix, String postfix, boolean calcTags,
            boolean tensor, BookmarkReader reader) {
        if (reader == null) {
            getTrainTestSize(sampleName + (postfix != null ? "_" + postfix : ""));
            reader = new BookmarkReader(TRAIN_SIZE, false);
            reader.readFile(sampleName + (postfix != null ? "_" + postfix : ""));
        }
        if (calcTags) {
            writeMetrics(sampleDir, sampleName, prefix, 1, 10, postfix, reader, tensor ? TRAIN_SIZE : null);
        } else {
            writeMetricsForResources(sampleDir, sampleName, prefix, 1, 20, postfix, reader, tensor ? TRAIN_SIZE : null);
        }
    }

    // Item Recommendation
    // ------------------------------------------------------------------------------------------------------------------------------------
    private static void startBaselineCalculatorForResources(String sampleDir, String sampleName, int size,
            boolean random, boolean writeTime) {
        BookmarkReader reader = null;
        String posfix = "";
        if (TOPIC_NAME != null) {
            posfix = "_" + TOPIC_NAME;
        }
        for (int i = 1; i <= size; i++) {
            getTrainTestSize(sampleName + posfix);
            if (random) {
                reader = MPResourceCalculator.predictRandomResources(sampleName + posfix, TRAIN_SIZE, writeTime);
            } else {
                reader = MPResourceCalculator.predictPopularResources(sampleName + posfix, TRAIN_SIZE, writeTime);
            }
        }
        if (random) {
            writeMetricsForResources(sampleDir, sampleName, "rand", size, 20, TOPIC_NAME, reader, null);
        } else {
            writeMetricsForResources(sampleDir, sampleName, "mp", size, 20, TOPIC_NAME, reader, null);
        }
    }

    private static void startResourceCIRTTCalculator(String sampleDir, String sampleName, String topicString, int size,
            int neighborSize, Features features, boolean userSim, boolean bll, boolean novelty,
            boolean calculateOnTag) {
        BookmarkReader reader = null;
        String posfix = "";
        if (TOPIC_NAME != null) {
            posfix = "_" + TOPIC_NAME;
        }
        String suffix = "r3l_" + features;
        if (bll) {
            suffix += "_bll";
        }
        for (int i = 1; i <= size; i++) {
            getTrainTestSize(sampleName + posfix);
            reader = CIRTTCalculator.predictSample(sampleName + (!topicString.isEmpty() ? "_" + topicString : ""),
                    TRAIN_SIZE, TEST_SIZE, neighborSize, features, userSim, bll, novelty, calculateOnTag);
        }
        writeMetricsForResources(sampleDir, sampleName, suffix, size, 20, !topicString.isEmpty() ? topicString : null,
                reader, null);
    }

    private static void startZhengResourceCalculator(String sampleDir, String sampleName, int size) {
        BookmarkReader reader = null;
        String posfix = "";
        if (TOPIC_NAME != null) {
            posfix = "_" + TOPIC_NAME;
        }
        for (int i = 1; i <= size; i++) {
            getTrainTestSize(sampleName + posfix);
            reader = ZhengCalculator.predictSample(sampleName + posfix, TRAIN_SIZE);
        }
        writeMetricsForResources(sampleDir, sampleName, "zheng_tagtime", size, 20, TOPIC_NAME, reader, null);
    }

    private static void startHuangResourceCalculator(String sampleDir, String sampleName, int size) {
        BookmarkReader reader = null;
        String posfix = "";
        if (TOPIC_NAME != null) {
            posfix = "_" + TOPIC_NAME;
        }
        for (int i = 1; i <= size; i++) {
            getTrainTestSize(sampleName + posfix);
            reader = HuangCalculator.predictSample(sampleName + posfix, TRAIN_SIZE);
        }
        writeMetricsForResources(sampleDir, sampleName, "huang_tag_user", size, 20, TOPIC_NAME, reader, null);
    }

    private static void startCfResourceCalculator(String sampleDir, String sampleName, int size, int neighborSize,
            boolean userBased, boolean resBased, boolean allResources, boolean bll, Features features,
            boolean writeTime) {
        BookmarkReader reader = null;
        String posfix = "";
        if (TOPIC_NAME != null) {
            posfix = "_" + TOPIC_NAME;
        }
        String suffix = "cf_";
        if (!userBased) {
            suffix = "rescf_";
        } else if (!resBased) {
            suffix = "usercf_";
        }
        if (!userBased && !allResources) {
            suffix += "mixed_";
        }
        if (bll) {
            suffix += "bll_";
        }
        suffix += features + "_";
        for (int i = 1; i <= size; i++) {
            getTrainTestSize(sampleName);
            reader = CFResourceCalculator.predictResources(sampleName + posfix, TRAIN_SIZE, TEST_SIZE, neighborSize,
                    userBased, resBased, allResources, bll, features, writeTime);
        }
        writeMetricsForResources(sampleDir, sampleName, suffix + "5", size, 20, TOPIC_NAME, reader, null);
    }

    private static void startSustainApproach(String sampleDir, String sampleName, double r, double tau, double beta,
            double learning_rate, int trainingRecency, int candidateNumber, int sampleSize, double cfWeight) {
        BookmarkReader reader = null;
        getTrainTestSize(sampleName);
        SustainCalculator sustain = new SustainCalculator(sampleName, TRAIN_SIZE);

        reader = sustain.predictResources(r, tau, beta, learning_rate, trainingRecency, candidateNumber, sampleSize,
                cfWeight);

        String prefix = "sustain";
        writeMetricsForResources(sampleDir, sampleName, prefix, 1, 20, null, reader, TRAIN_SIZE);
    }

    private static void writeMetricsForResources(String sampleDir, String sampleName, String prefix, int sampleCount,
            int k, String posfix, BookmarkReader reader, Integer trainSize) {
        String topicString = ((posfix == null || posfix == "0") ? "_" : "_" + posfix + "_");
        for (int i = 1; i <= k; i++) {
            for (int j = 1; j <= sampleCount; j++) {
                MetricsCalculator.calculateMetrics(sampleName + topicString + prefix, i,
                        sampleDir + "/" + prefix + topicString + "_metrics", false, reader, MIN_USER_BOOKMARKS,
                        MAX_USER_BOOKMARKS, MIN_RESOURCE_BOOKMARKS, MAX_RESOURCE_BOOKMARKS, null, false, trainSize);
            }
            MetricsCalculator.writeAverageMetrics(sampleDir + "/" + prefix + topicString + "metrics", i,
                    (double) sampleCount, false, i == k, null);
        }
    }
}