package happy.research.cf;
import happy.coding.io.FileIO;
import happy.coding.io.Logs;
import happy.coding.math.Randoms;
import happy.coding.math.Sims;
import happy.coding.math.Stats;
import happy.coding.system.Debug;
import happy.research.cf.ConfigParams.DatasetMode;
import happy.research.utils.SimUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
public abstract class DefaultCF extends AbstractCF {
protected static Map<String, Double> itemMeanMap;
//protected static Map<Double, Integer> scaleNum;
protected static Map<Double, Integer> distanceNum;
private Map<Rating, Map<String, Double>> ratingItemSd = null;
private Map<Rating, Map<String, Double>> ratingItemMu = null;
private Map<Rating, Map<String, Double>> ratingItemConf = null;
private Map<Rating, Map<String, Map<Double, Double>>> ratingItemHist = null;
private Map<Rating, Map<String, Integer>> ratingItemPos = null;
private Map<Rating, Map<String, Integer>> ratingItemNeg = null;
/*
* trustDirPath: path of the generated trust directory similarityDirPath:
* path of the generated similarity directory missingRatingsDirPath: path of
* the predicted missing ratings of the trusted neighbours' directory
*/
protected static String trustDirPath = null;
/**
* To initialize some variables if data sets need to be reloaded again.
*/
@Override
protected void init() {
numRunMethod++;
userRatingsMap = null;
itemRatingsMap = null;
userTNsMap = null;
userDNsMap = null;
userTrustorsMap = null;
testRatings = null;
itemMeanMap = null;
testUserRatingsMap = null;
testItemRatingsMap = null;
}
@Override
protected void loadDataset() throws Exception {
load_trusts();
load_ratings();
}
protected void prepTestRatings() {
testRatings = null;
preProcessing();
Logs.debug("Preparing test-rating data ...");
testRatings = new ArrayList<>();
if (params.DATASET_MODE == DatasetMode.nicheItems || params.DATASET_MODE == DatasetMode.contrItems) {
for (Entry<String, Map<String, Rating>> en : itemRatingsMap.entrySet()) {
Map<String, Rating> ratings = en.getValue();
switch (params.DATASET_MODE) {
case nicheItems:
if (ratings.size() < 5)
testRatings.addAll(ratings.values());
break;
case contrItems:
if (RatingUtils.std(ratings.values()) > 1.5)
testRatings.addAll(ratings.values());
break;
default:
break;
}
}
} else {
for (Entry<String, Map<String, Rating>> en : userRatingsMap.entrySet()) {
Map<String, Rating> ratings = en.getValue();
int count = ratings.size();
switch (params.DATASET_MODE) {
case all:
if (count > 0) {
testRatings.addAll(ratings.values());
}
break;
case coldUsers:
//Map<Integer, Double> tns = userTNsMap.get(user);
if (count < 5) {
testRatings.addAll(ratings.values());
}
break;
case heavyUsers:
if (count > 10) {
testRatings.addAll(ratings.values());
}
break;
case opinUsers:
if (count > 4 && RatingUtils.std(ratings.values()) > 1.5) {
testRatings.addAll(ratings.values());
}
break;
case blackSheep:
if (count > 4 && RatingUtils.meanDistance(ratings, itemMeanMap) > 1) {
testRatings.addAll(ratings.values());
}
break;
default:
break;
}
}
}
Logs.debug("Done!");
}
/**
* used for any needed pre-processing before generating testing ratings
*/
protected void preProcessing() {
// used for any needed pre-processing before generating testing ratings
}
@SuppressWarnings({ "unchecked", "rawtypes" })
protected void load_ratings() throws Exception {
if (userRatingsMap == null) {
String ratingSet = null;
switch (params.VALIDATE_METHOD) {
case leave_one_out:
ratingSet = Dataset.DIRECTORY + Dataset.RATING_SET;
break;
case cross_validation:
ratingSet = Dataset.DIRECTORY + params.TRAIN_SET;
break;
}
Logs.debug("Loading rating data {}", ratingSet);
Map[] data = Dataset.loadTrainSet(ratingSet);
userRatingsMap = data[0];
itemRatingsMap = data[1];
//scaleNum = data[2];
switch (params.DATASET_MODE) {
case all:
case blackSheep:
case nicheItems:
case contrItems:
itemMeanMap = RatingUtils.itemMeans(itemRatingsMap);
break;
default:
break;
}
}
}
protected void load_trusts() throws Exception {
if (userTNsMap == null) {
String trustSet = null;
if (!params.auto_trust_sets) {
trustSet = Dataset.DIRECTORY + Dataset.TRUST_SET;
if (!FileIO.exist(trustSet))
return;
} else {
trustSet = current_trust_dir + Dataset.TRUST_SET;
}
switch (Dataset.dataset) {
case EXTENDED_EPINIONS:
if (!params.auto_trust_sets) {
String distrustSet = Dataset.DIRECTORY + Dataset.DISTRUST_SET;
Logs.debug("Loading distrust data {}", distrustSet);
userDNsMap = DatasetUtils.loadTrustSet(distrustSet);
// no break to continue load trust
}
case EPINIONS:
case FILMTRUST:
case FLIXSTER:
Logs.debug("Loading trust data {}", trustSet);
userTNsMap = DatasetUtils.loadTrustSet(trustSet);
userTrustorsMap = DatasetUtils.loadTrusteeSet(trustSet);
break;
default:
break;
}
}
}
protected void randomTrustWalk(Rating testRating, Map<String, Double> weights, Map<String, Rating> ratings) {
String user = testRating.getUserId();
double epsilon = 0.0001;
int epochs = 10000;
int maxLength = 6;
String itemId = testRating.getItemId();
boolean fix_phi = true;
double fix_value = 0.0;
Map<String, Rating> testRatings = itemRatingsMap.get(itemId);
if (testRating == null || testRatings.size() < 2)
return;
double previousVariance = 0.0;
double currentVariance = 0.0;
for (int epoch = 0; epoch < epochs; epoch++) {
String currentUser = user;
double probability = 1.0;
/* one random walk */
for (int currentLength = 1; currentLength <= maxLength; currentLength++) {
Map<String, Double> tns = userTNsMap.get(currentUser);
if (tns == null || tns.size() < 1)
break;
/* choose next user */
int size = tns.size();
double step = 1.0 / size;
int tnIndex = Randoms.uniform(size);
int j = 0;
for (String i : tns.keySet()) {
if (j == tnIndex) {
currentUser = i;
break;
}
j++;
}
if (currentUser.equals(user))
break;
probability *= step;
/* if the current user rated the item */
Map<String, Rating> rs = userRatingsMap.get(currentUser);
if (rs == null || rs.size() < 1)
break;
if (rs.containsKey(itemId)) {
weights.put(currentUser, probability);
ratings.put(currentUser, rs.get(itemId));
break;
}
/* compute phi */
List<Double> sims = new ArrayList<>();
List<Rating> rats = new ArrayList<>();
double phi = 0.0;
double sum = 0.0;
if (!fix_phi) {
phi = fix_value;
} else {
for (Rating r : rs.values()) {
String itemJ = r.getItemId();
Map<String, Rating> jsRS = itemRatingsMap.get(itemJ);
List<Double> is = new ArrayList<>();
List<Double> js = new ArrayList<>();
for (String userI : testRatings.keySet()) {
if (jsRS.containsKey(userI)) {
is.add(testRatings.get(userI).getRating());
js.add(jsRS.get(userI).getRating());
}
}
if (is.size() < 2)
continue;
double similarity = Sims.pcc(is, js);
if (Double.isNaN(similarity))
continue;
if (similarity > 0) {
sims.add(similarity);
rats.add(r);
sum += similarity;
if (phi < similarity)
phi = similarity;
}
}
/* if no similar items => phi=0 => random>phi => go on */
phi *= 1.0 / (1 + Math.exp(-currentLength));
}
/* stay or go on */
double random = Randoms.uniform();
if (random < phi) {
probability *= phi;
int index = Randoms.uniform(0, sims.size());
double prob = sims.get(index) / sum;
probability *= prob;
weights.put(currentUser, probability);
ratings.put(currentUser, rats.get(index));
} else {
probability *= (1 - phi);
}
}
/* test if converged */
double[] data = new double[ratings.size()];
for (int m = 0; m < data.length; m++)
data[m] = ratings.get(m).getRating();
currentVariance = Stats.var(data);
if (Math.abs(currentVariance - previousVariance) < epsilon)
break;
else
previousVariance = currentVariance;
}
}
protected double similarity(List<Double> as, List<Double> bs, List<String> items, Rating testRating) {
double similarity = 0;
switch (params.SIMILARITY_METHOD) {
case COS:
similarity = Sims.cos(as, bs);
break;
case iufCOS:
/*
* This implementation refer to the paper: Alan et al., Analyzing
* Weighting Schemes in Collaborative Filtering: Cold Start, Post
* Cold Start and Power Users, SAC 2012.
*/
double num_all_users = userRatingsMap.size();
double inner = 0; // inner product
double al = 0; // a's length
double bl = 0; // b's length
for (int i = 0, im = as.size(); i < im; i++) {
double rai = as.get(i);
double rbi = bs.get(i);
String item = items.get(i);
// inverse user frequency
double iuf = Math.log(num_all_users / itemRatingsMap.get(item).size());
inner += iuf * rai * rbi;
al += iuf * rai * rai;
bl += iuf * rbi * rbi;
}
if (al > 0 && bl > 0)
similarity = inner / (Math.sqrt(al) * Math.sqrt(bl));
break;
case BS:
try {
/* This is the similarity method for IJCAI paper */
List<Double> priors = learnScalePriors(testRating);
if (ratingItemSd == null) {
ratingItemSd = new HashMap<>();
ratingItemMu = new HashMap<>();
ratingItemConf = new HashMap<>();
ratingItemHist = new HashMap<>();
}
Map<String, Double> itemSd = null;
Map<String, Double> itemMu = null;
Map<String, Double> itemConf = null;
Map<String, Map<Double, Double>> itemHist = null;
if (ratingItemSd.containsKey(testRating)) {
itemSd = ratingItemSd.get(testRating);
itemMu = ratingItemMu.get(testRating);
itemConf = ratingItemConf.get(testRating);
itemHist = ratingItemHist.get(testRating);
} else {
itemSd = new HashMap<>();
itemMu = new HashMap<>();
itemConf = new HashMap<>();
itemHist = new HashMap<>();
for (String item : itemRatingsMap.keySet()) {
Map<String, Rating> userRatings = itemRatingsMap.get(item);
List<Double> rs = new ArrayList<>();
int pos = 0;
for (Rating r : userRatings.values()) {
if (r == testRating)
continue;
double rate = r.getRating();
rs.add(rate);
if (rate > Dataset.median)
pos++;
}
double mean = Stats.mean(rs);
double deviation = Stats.sd(rs, mean);
double conf = 0;
Map<Double, Double> hist = new HashMap<>();
if (Debug.OFF) {
//conf = 1.0 / (1.0 + Math.exp(-rs.size() / 2.0));
double thrd = 10.0;
if (rs.size() > thrd)
conf = 1.0;
else
conf = rs.size() / thrd;
} else if (Debug.ON) {
conf = pos / (rs.size() + 0.0);
} else if (Debug.OFF) {
// new approach: pair-wise rating distance distribution
Map<Double, Integer> dists = new HashMap<>();
List<Double> ds = new ArrayList<>();
int total = 0;
for (int i = 0; i < rs.size(); i++) {
double r1 = rs.get(i);
for (int j = i + 1; j < rs.size(); j++) {
double r2 = rs.get(j);
double dist = Math.abs(r1 - r2);
int cnt = 0;
if (dists.containsKey(dist))
cnt = dists.get(dist);
dists.put(dist, cnt + 1);
ds.add(dist);
total++;
}
}
mean = Stats.mean(ds);
deviation = Stats.sd(ds, mean);
conf = 1.0 / (1.0 + Math.exp(-total / 2.0));
//conf = Maths.log(1 + total, 100);
if (conf > 1.0)
conf = 1.0;
//if (total > 30) conf = 1.0;
//else conf = total / 30.0;
//Logs.debug("total = {}", total);
for (Entry<Double, Integer> en : dists.entrySet()) {
double ratio = 0;
ratio = en.getValue() / (rs.size() + 0.0);
hist.put(en.getKey(), ratio);
}
}
itemSd.put(item, deviation);
itemMu.put(item, mean);
itemConf.put(item, conf);
itemHist.put(item, hist);
}
ratingItemSd.clear();
ratingItemSd.put(testRating, itemSd);
ratingItemMu.clear();
ratingItemMu.put(testRating, itemMu);
ratingItemConf.clear();
ratingItemConf.put(testRating, itemConf);
ratingItemHist.clear();
ratingItemHist.put(testRating, itemHist);
}
List<Double> sd = new ArrayList<>();
List<Double> mu = new ArrayList<>();
List<Double> cf = new ArrayList<>();
Map<Integer, Map<Double, Double>> histos = new HashMap<>();
for (String item : items) {
int index = sd.size();
sd.add(itemSd.get(item));
mu.add(itemMu.get(item));
cf.add(itemConf.get(item));
histos.put(index, itemHist.get(item));
}
similarity = SimUtils.bsSim(as, bs, priors, sd, mu, histos, cf);
} catch (Exception e) {
e.printStackTrace();
}
break;
case PCC:
case caPCC:
if (as.size() < 2 || bs.size() < 2)
similarity = Double.NaN;
else
similarity = Sims.pcc(as, bs);
break;
case MSD:
similarity = Sims.msd(as, bs);
break;
case CPC:
similarity = Sims.cpc(as, bs, Dataset.median);
break;
case PIP:
if (ratingItemMu == null)
ratingItemMu = new HashMap<>();
Map<String, Double> itemMeans = null;
if (ratingItemMu.containsKey(testRating))
itemMeans = ratingItemMu.get(testRating);
else {
itemMeans = new HashMap<>();
for (String item : itemRatingsMap.keySet()) {
double sum = 0.0;
Map<String, Rating> userRatings = itemRatingsMap.get(item);
int count = 0;
for (Rating r : userRatings.values()) {
if (r == testRating)
continue;
else {
count++;
sum += r.getRating();
}
}
if (count > 0)
itemMeans.put(item, sum / count);
}
ratingItemMu.clear(); // to save memory
ratingItemMu.put(testRating, itemMeans);
}
/* prep item-mean for the co-rated items */
List<Double> means = new ArrayList<>();
for (String item : items)
means.add(itemMeans.get(item));
similarity = SimUtils.PIPSim(as, bs, means);
// System.out.println("PIP sim = " + similarity);
break;
case SM:
if (ratingItemPos == null) {
ratingItemPos = new HashMap<>();
ratingItemNeg = new HashMap<>();
}
Map<String, Integer> itemPos = null;
Map<String, Integer> itemNeg = null;
if (ratingItemPos.containsKey(testRating)) {
itemPos = ratingItemPos.get(testRating);
itemNeg = ratingItemNeg.get(testRating);
} else {
itemPos = new HashMap<>();
itemNeg = new HashMap<>();
for (String item : itemRatingsMap.keySet()) {
Map<String, Rating> userRatings = itemRatingsMap.get(item);
int pos = 0, neg = 0;
for (Rating r : userRatings.values()) {
if (r == testRating)
continue;
if (r.getRating() > Dataset.median)
pos++;
else
neg++;
}
itemPos.put(item, pos);
itemNeg.put(item, neg);
}
ratingItemPos.clear(); // to save memory
ratingItemNeg.clear(); // to save memory
ratingItemPos.put(testRating, itemPos);
ratingItemNeg.put(testRating, itemNeg);
}
List<Double> posSing = new ArrayList<>();
List<Double> negSing = new ArrayList<>();
int numUsers = userRatingsMap.keySet().size();
for (String item : items) {
double pos = itemPos.get(item);
double neg = itemNeg.get(item);
double ps = 1 - pos / numUsers;
double ns = 1 - neg / numUsers;
posSing.add(ps);
negSing.add(ns);
}
similarity = SimUtils.SMSim(as, bs, posSing, negSing);
// System.out.println("SM sim = " + similarity);
break;
default:
break;
}
return similarity;
}
private List<Double> learnScalePriors(Rating testRating) {
List<Double> scales = new ArrayList<>();
double sum = 0.0;
for (Integer num : Dataset.scaleNum.values())
sum += num;
if (testRating.getRating() > 0)
sum--;
for (int i = 0; i < Dataset.scaleSize; i++) {
double scale = (i + 1) * Dataset.minScale;
int num = Dataset.scaleNum.get(scale);
if (scale == testRating.getRating())
num--;
int size = Dataset.scaleSize;
double ratio = num / sum;
scales.add(size * ratio);
}
return scales;
}
}