package processing.hashtag.solr; import java.io.File; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import com.google.common.primitives.Ints; import common.Bookmark; import common.MapUtil; import file.BookmarkReader; import file.PredictionFileWriter; import file.ResultSerializer; import processing.CFTagRecommender; public class CFSolrHashtagCalculator { public static void predictSample(String dir, String filename, int trainSize, double betaCB, String solrUrl, String solrCore) { BookmarkReader reader = new BookmarkReader(0, false); reader.readFile(filename); Map<Integer, Map<Integer, Double>> contentBasedValues = null; if (solrUrl != null && solrCore != null) { if (new File("./data/results/" + dir + "/" + solrCore + "_cbpredictions.ser").exists()) { System.out.println("Found cb file ..."); contentBasedValues = SolrHashtagCalculator.deSerializeHashtagPrediction("./data/results/" + dir + "/" + solrCore + "_cbpredictions.ser"); } else { System.out.println("Did not find cb file ..."); contentBasedValues = SolrHashtagCalculator.getNormalizedHashtagPredictions(dir, solrCore, solrUrl, reader, null); } System.out.println("Number of content-based recommendations: " + contentBasedValues.size()); } reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); List<Bookmark> testLines = reader.getTestLines(); CFTagRecommender calculator = new CFTagRecommender(reader, trainSize, true, false, 5); List<Map<Integer, Double>> resultValues = new ArrayList<Map<Integer, Double>>(); Map<Integer, Map<Integer, Double>> hashtagMaps = new LinkedHashMap<Integer, Map<Integer, Double>>(); for (Bookmark b : testLines) { // get cf Tags if (contentBasedValues.containsKey(b.getUserID())) { Map<Integer, Double> cfTags = calculator.getRankedTagList(b.getUserID(), b.getResourceID(), false); MapUtil.normalizeMap(cfTags); if (cfTags != null && cfTags.entrySet() != null) { for (Map.Entry<Integer, Double> cfEntry : cfTags.entrySet()) { if (cfEntry != null && cfEntry.getKey() != null) { cfEntry.setValue(betaCB * cfEntry.getValue()); } } } Map<Integer, Double> contentMap = contentBasedValues.get(b.getUserID()); if (contentMap != null && contentMap.entrySet() != null) { for (Map.Entry<Integer, Double> contentEntry : contentMap.entrySet()) { if (contentEntry != null && contentEntry.getKey() != null) { Double cfVal = cfTags.get(contentEntry.getKey()); double contentVal = (1.0 - betaCB) * contentEntry.getValue(); cfTags.put(contentEntry.getKey(), cfVal == null ? contentVal : cfVal.doubleValue() + contentVal); } } } Map<Integer, Double> sortedMap = MapUtil.sortByValue(cfTags); resultValues.add(sortedMap); hashtagMaps.put(b.getUserID(), sortedMap); } else { // ignore all entries where no content-based recommendations where found resultValues.add(null); } } List<int[]> predictionValues = new ArrayList<int[]>(); if (resultValues != null) { for (int i = 0; i < resultValues.size(); i++) { Map<Integer, Double> resultMap = resultValues.get(i); if (resultMap != null && resultMap.keySet() != null) { predictionValues.add(Ints.toArray(resultMap.keySet())); } else { predictionValues.add(null); } } } ResultSerializer.serializePredictions(hashtagMaps, "./data/results/" + dir + "/" + solrCore + "_cbcfpredictions.ser"); PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); writer.writeFile(filename + "_cf_cb_" + betaCB); } }