package happy.research.cf; import happy.coding.io.FileIO; import happy.coding.math.Sims; import happy.coding.math.Stats; import happy.coding.system.Debug; import happy.research.utils.SimUtils; import happy.research.utils.TrustUtils; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; /** * All different Merge cases: * <ul> * <li>A: Decide which trusted neighbors to be involved. * <ul> * <li>A1: all trusted neighbors</li> * <li>A2: trusted neighbors whose similarity is greater than threshold</li> * <li>A3: trusted neighbors whose ratings are greater than threshold</li> * <li>A4: trusted neighbors whose reputation is greater than threshold</li> * <li>A5: top-k trusted neighbors</li> * </ul> * </li> * <li>B: Decide which ratings of trusted neighbors as candidates to be merged. * <ul> * <li>B1: keep active user's own ratings</li> * <li>B2: all ratings are equally</li> * </ul> * </li> * <li>C: Decide what weight values of trusted neighbors to be used for merging * ratings. * <ul> * <li>C1: trust weight</li> * <li>C2: similarity</li> * <li>C3: harmonic weight of trust and similarity</li> * </ul> * </li> * <li>D: Decide which (trusted or similar) neighbors to be used for predicting * item's rating. * <ul> * <li>D1: TNs + NNs</li> * <li>D2: NNs (including some trusted neighbors as similar users)</li> * <li>D3: NNs (excluding all trusted neighbors)</li> * </ul> * </li> * <li>E: Confidence calculation method * <ul> * <li>E1: ratings.std < threshold</li> * <li>E2: ratings.num > threshold</li> * <li>E3: ratings.certainty > threshold</li> * </ul> * </li> * <li>F: Other tricks we can try * <ul> * <li>F1: only trusted neighbors with many ratings are used.</li> * <li>F2: different confidence for cold or heavy users.</li> * </ul> * </li> * </ul> * * @author guoguibing */ public class Merge_t extends Thread_t { protected boolean aAllTNs = false; protected boolean bKeepOwnRatings = false; /* weight to be used for merging */ protected boolean c1Trust = false; protected boolean c2Sim = false; protected boolean c3Harmonic = false; protected boolean c4Average = false; protected boolean d1TN_NN = false; protected boolean d2NN_TN = false; protected boolean d3NN = false; protected boolean e1Std = false; protected boolean e2Num = false; protected boolean e3Certainty = false; public static Thread_t newMergeCase(int id, String pc) { return new Merge_t(id, pc); } /** * set the test case according to the input pattern * * @param pattern * case pattern, such as A1B1C1D1 */ protected void setCase(String pattern) { int a = Integer.parseInt(pattern.substring(1, 2)); int b = Integer.parseInt(pattern.substring(3, 4)); int c = Integer.parseInt(pattern.substring(5, 6)); int d = Integer.parseInt(pattern.substring(7, 8)); int e = 0; if (pattern.length() > 8) e = Integer.parseInt(pattern.substring(9, 10)); if (a == 1) aAllTNs = true; else aAllTNs = false; if (b == 1) bKeepOwnRatings = true; else bKeepOwnRatings = false; switch (c) { case 1: c1Trust = true; c2Sim = false; c3Harmonic = false; c4Average = false; break; case 2: c1Trust = false; c2Sim = true; c3Harmonic = false; c4Average = false; break; case 3: c1Trust = false; c2Sim = false; c3Harmonic = true; c4Average = false; break; case 4: c1Trust = false; c2Sim = false; c3Harmonic = false; c4Average = true; break; } switch (d) { case 1: d1TN_NN = true; d2NN_TN = false; d3NN = false; break; case 2: d1TN_NN = false; d2NN_TN = true; d3NN = false; break; case 3: d1TN_NN = false; d2NN_TN = false; d3NN = true; break; } // logger.debug("a = {}, b = {}, c = {}, d = {}, e = {}", new Object[] { // a, b, c, d, e }); switch (e) { case 1: e1Std = true; e2Num = false; e3Certainty = false; break; case 2: e1Std = false; e2Num = true; e3Certainty = false; break; case 3: e1Std = false; e2Num = false; e3Certainty = true; break; } } public Merge_t(int id, String pattern) { super(id); setCase(pattern); } protected Map<String, Double>[] buildModel(Rating testRating) { String user = testRating.getUserId(); Map<String, Double> tnScores = null; try { tnScores = FileIO.readAsIDMap(trustDirPath + user + ".txt"); } catch (FileNotFoundException e) { // logger.debug("No trusted neighbours for user {}", user); } catch (Exception e) { e.printStackTrace(); } if (tnScores == null) tnScores = new HashMap<>(); tnScores.put(user, 1.0); Map<String, Double> proxyRatings = new HashMap<>(); Map<String, Double> itemCons = mergeRatings(testRating, tnScores, proxyRatings); Map<String, Double>[] nnData = findoutNNs(testRating, proxyRatings, itemCons, tnScores); Map<String, Double>[] ttData = null; //d1TN_NN = false; if (d1TN_NN) ttData = useTrustRatings(testRating, nnData != null ? nnData[0] : null); return combineData(nnData, ttData); } protected Map<String, Double> mergeRatings(Rating testRating, Map<String, Double> tnScores, Map<String, Double> proxyRatings) { Map<String, List<Rating>> itemRatingsMap = new HashMap<>(); Map<String, Double> itemRatings = new HashMap<>(); Map<String, Double> itemMeans = new HashMap<>(); Map<String, Double> itemConfidences = new HashMap<>(); Map<String, Double> tnSims = new HashMap<>(); /* only use for storing active user's own ratings */ Map<String, Double> activeRatings = new HashMap<>(); Map<String, Double> itemCons = new HashMap<>(); int knn = params.kNN; String user = testRating.getUserId(); if (bKeepOwnRatings) { Map<String, Rating> rs = userRatingsMap.get(user); if (rs != null) { for (Rating r : rs.values()) { if (r == testRating) continue; activeRatings.put(r.getItemId(), r.getRating()); itemCons.put(r.getItemId(), 1.0); } } knn = knn - activeRatings.size(); if (knn <= 0) { proxyRatings.putAll(activeRatings); return itemCons; } } Map<String, Rating> asRatings = userRatingsMap.get(user); for (String tn : tnScores.keySet()) { if (bKeepOwnRatings && tn.equals(user)) continue; Map<String, Rating> tnsRatings = userRatingsMap.get(tn); if (tnsRatings == null) continue; /* A: determine which trusted neighbours to be used */ double similarity = 1.0; if (aAllTNs) { // A1: use all trusted neighbours, no need to compute // similarity similarity = 1.0; if (tn != user) { List<Double> as = new ArrayList<>(); List<Double> bs = new ArrayList<>(); for (String item : asRatings.keySet()) { if (item.equals(testRating.getItemId())) continue; if (tnsRatings.containsKey(item)) { as.add(asRatings.get(item).getRating()); bs.add(tnsRatings.get(item).getRating()); } } double result = Sims.pcc(as, bs); if (!Double.isNaN(result)) similarity = result; } } else { // A2: use only trusted neighbours with similarity // constraints if (tn.equals(user)) { // always similar to himself similarity = 1.0; } else { // similarity with active user if (asRatings.size() < 3) similarity = 1.0; else { similarity = 0.0; List<Double> as = new ArrayList<>(); List<Double> bs = new ArrayList<>(); for (String item : asRatings.keySet()) { if (item.equals(testRating.getItemId())) continue; if (tnsRatings.containsKey(item)) { as.add(asRatings.get(item).getRating()); bs.add(tnsRatings.get(item).getRating()); } } double result = Sims.pcc(as, bs); if (!Double.isNaN(result)) similarity = result; } } } /* find out rated items of trusted neighbours */ if (aAllTNs || similarity > params.SIMILARITY_THRESHOLD) { tnSims.put(tn, similarity); for (Rating r : tnsRatings.values()) { if (r == testRating) continue; String itemId = r.getItemId(); if (bKeepOwnRatings && activeRatings.containsKey(itemId)) continue; List<Rating> trs = null; if (itemRatingsMap.containsKey(itemId)) trs = itemRatingsMap.get(itemId); else trs = new ArrayList<>(); trs.add(r); itemRatingsMap.put(itemId, trs); } } } /* merge ratings of items together */ if (bKeepOwnRatings) itemRatings.putAll(activeRatings); for (Entry<String, List<Rating>> en : itemRatingsMap.entrySet()) { String item = en.getKey(); List<Rating> ratings = en.getValue(); double sum = 0.0; double weights = 0.0; int positive = 0, negative = 0; for (Rating r : ratings) { String tn = r.getUserId(); double similarity = 0.0; if (tnSims.containsKey(tn)) similarity = tnSims.get(tn); double trust = tnScores.get(tn); double weight = 0; if (c3Harmonic) weight = Stats.hMean(trust, similarity); else if (c1Trust) weight = trust; else if (c2Sim) weight = similarity; else if (c4Average) weight = (trust + similarity) / 2.0; sum += weight * r.getRating(); weights += Math.abs(weight); if (r.getRating() > Dataset.median) positive++; else negative++; } double mean = sum / weights; if (Double.isNaN(mean) || mean <= 0.0) continue; double certainty = 0.0; try { certainty = TrustUtils.confidence(positive, negative); } catch (Exception e) { e.printStackTrace(); } if (certainty > params.CONFIDENCE_THRESHOLD) { itemMeans.put(item, mean); itemConfidences.put(item, certainty); } } /* ranking confidences */ if (itemConfidences.size() > 0) { List<Double> confidences = new ArrayList<>(itemConfidences.values()); Collections.sort(confidences); int count = 0; for (int i = confidences.size() - 1; i >= 0; i--) { double confidence = confidences.get(i); for (Entry<String, Double> en : itemConfidences.entrySet()) { String itemId = en.getKey(); double c = en.getValue(); if (c == confidence && !itemRatings.containsKey(itemId)) { count++; itemRatings.put(itemId, itemMeans.get(itemId)); itemCons.put(itemId, confidence); break; } } if (count >= knn) break; } } proxyRatings.putAll(itemRatings); return itemCons; } @SuppressWarnings("unchecked") protected Map<String, Double>[] findoutNNs(Rating testRating, Map<String, Double> proxyRatings, Map<String, Double> itemCons, Map<String, Double> tnScores) { if (proxyRatings == null || proxyRatings.size() < 1) return null; String user = testRating.getUserId(); String item = testRating.getItemId(); double rating = testRating.getRating(); Map<String, Double> nnScores = new HashMap<>(); Map<String, Double> nnRatings = new HashMap<>(); for (Entry<String, Map<String, Rating>> en : userRatingsMap.entrySet()) { String userB = en.getKey(); if (userB.equals(user)) continue; // if (tnScores != null && tnScores.containsKey(userB)) continue; Map<String, Rating> bsRatings = en.getValue(); if (bsRatings == null) continue; double bsRating = 0.0; if (rating > 0) { if (bsRatings.containsKey(item)) bsRating = bsRatings.get(item).getRating(); if (bsRating <= 0.0) continue; } List<Double> as = new ArrayList<>(); List<Double> bs = new ArrayList<>(); List<Double> cs = new ArrayList<>(); for (String itemId : bsRatings.keySet()) { if (proxyRatings.containsKey(itemId)) { as.add(proxyRatings.get(itemId)); bs.add(bsRatings.get(itemId).getRating()); cs.add(itemCons.get(itemId)); } } double weight = 1.0; if (Debug.ON) { double gamma = 10.0; if (as.size() > gamma) weight = 1.0; else weight = as.size() / gamma; } double similarity = weight * SimUtils.pearsonSim(as, bs, cs); if (Double.isNaN(similarity)) continue; if (similarity > 0.0) { if (tnScores.containsKey(userB)) similarity = Stats.hMean(similarity, 1.0); nnScores.put(userB, similarity); nnRatings.put(userB, bsRating); } } return new Map[] { nnScores, nnRatings }; } @SuppressWarnings("unchecked") protected Map<String, Double>[] useTrustRatings(Rating testRating, Map<String, Double> nnScores) { String user = testRating.getUserId(); String item = testRating.getItemId(); Map<String, Double> scores = null; Map<String, Double> trustScores = new HashMap<>(); Map<String, Double> trustRatings = new HashMap<>(); try { scores = FileIO.readAsIDMap(trustDirPath + user + ".txt"); } catch (FileNotFoundException e) { } catch (Exception e) { e.printStackTrace(); } if (scores == null || scores.size() < 1) return null; for (String tn : scores.keySet()) { if (tn.equals(user)) continue; if (nnScores != null && nnScores.containsKey(tn)) continue; Map<String, Rating> tnRatings = userRatingsMap.get(tn); if (tnRatings == null) continue; double tnRating = 0.0; if (tnRatings.containsKey(item)) tnRating = tnRatings.get(item).getRating(); if (tnRating > 0) { int size = userTrustorsMap.get(tn).size(); double sim = 2.0 / (1 + Math.exp(-size)) - 1; if (sim > 0.9) { trustScores.put(tn, scores.get(tn) * sim); trustRatings.put(tn, tnRating); } } } return new Map[] { trustScores, trustRatings }; } }