package happy.research.cf;
import happy.coding.io.FileIO;
import happy.coding.io.Logs;
import happy.coding.io.Strings;
import happy.coding.math.Randoms;
import happy.coding.math.Sims;
import happy.coding.math.Stats;
import happy.coding.system.Debug;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import de.bwaldvogel.liblinear.Feature;
import de.bwaldvogel.liblinear.FeatureNode;
import de.bwaldvogel.liblinear.Linear;
import de.bwaldvogel.liblinear.Model;
import de.bwaldvogel.liblinear.Parameter;
import de.bwaldvogel.liblinear.Problem;
import de.bwaldvogel.liblinear.SolverType;
/**
* Implementation of the paper "A Multi-aspect Trust-aware Recommender System:
* Explore the Relationship between Trust and User Similarity"
*
* by Hui et al. (2013)
*
* @author guoguibing
*/
public class MATrust_mt extends DefaultCF_mt {
private Model model;
private Map<String, Double> user_ins;
private static final double epsilon = 1.2;
private static final double theta = 1.2;
public MATrust_mt() {
methodId = "MultiAspect Trust";
model = null;
user_ins = null;
}
protected double benevolence(String a, String b) {
Map<String, Rating> asRatings = userRatingsMap.get(a);
Map<String, Rating> bsRatings = userRatingsMap.get(b);
if (asRatings == null || asRatings.size() < 1)
return Double.NaN;
if (bsRatings == null || bsRatings.size() < 1)
return Double.NaN;
List<Double> as = new ArrayList<>();
List<Double> bs = new ArrayList<>();
for (Entry<String, Rating> en : asRatings.entrySet()) {
String itemId = en.getKey();
if (bsRatings.containsKey(itemId)) {
as.add(en.getValue().getRating());
bs.add(bsRatings.get(itemId).getRating());
}
}
return Sims.pcc(as, bs);
}
protected double competence(String a, String b, double epsilon) {
Map<String, Rating> asRatings = userRatingsMap.get(a);
Map<String, Rating> bsRatings = userRatingsMap.get(b);
if (asRatings == null || asRatings.size() < 1)
return Double.NaN;
if (bsRatings == null || bsRatings.size() < 1)
return Double.NaN;
int num_a = asRatings.size();
int num_b = bsRatings.size();
double r = 0;
if (num_b <= num_a)
r = (num_b + 0.0) / num_a;
else
r = 1.0;
int count = 0;
int count_ex = 0;
for (Entry<String, Rating> en : bsRatings.entrySet()) {
String itemId = en.getKey();
double bsRating = en.getValue().getRating();
Map<String, Rating> userRatings = itemRatingsMap.get(itemId);
for (Rating rate : userRatings.values()) {
String j = rate.getUserId();
if (j.equals(b))
continue;
double jsRating = rate.getRating();
double e = Math.abs(bsRating - jsRating);
if (e < epsilon)
count++;
count_ex++;
}
}
double val = (count + 0.0) / count_ex;
return r * val;
}
protected Map<String, Double> integrity() {
Map<String, Double> user_integrity = new HashMap<>();
Map<String, Rating> avgRatings = new HashMap<>();
for (Entry<String, Map<String, Rating>> en : itemRatingsMap.entrySet()) {
String item = en.getKey();
int num = 0;
double sum = 0;
Map<String, Rating> vals = en.getValue();
for (Rating r : vals.values()) {
num++;
sum += r.getRating();
}
double avg = sum / num;
Rating r = new Rating();
r.setUserId("average_user");
r.setItemId(item);
r.setRating(avg);
avgRatings.put(item, r);
}
for (String u : userTNsMap.keySet()) {
Map<String, Rating> itemRatings = userRatingsMap.get(u);
if (itemRatings == null || itemRatings.size() < 1)
continue;
List<Double> as = new ArrayList<>();
List<Double> bs = new ArrayList<>();
for (Entry<String, Rating> en : itemRatings.entrySet()) {
String itemId = en.getKey();
if (avgRatings.containsKey(itemId)) {
as.add(avgRatings.get(itemId).getRating());
bs.add(en.getValue().getRating());
}
}
double pcc = Sims.pcc(as, bs);
if (!Double.isNaN(pcc))
user_integrity.put(u, pcc);
}
for (String u : userDNsMap.keySet()) {
if (user_integrity.containsKey(u))
continue;
Map<String, Rating> itemRatings = userRatingsMap.get(u);
if (itemRatings == null || itemRatings.size() < 1)
continue;
List<Double> as = new ArrayList<>();
List<Double> bs = new ArrayList<>();
for (Entry<String, Rating> en : itemRatings.entrySet()) {
String itemId = en.getKey();
if (avgRatings.containsKey(itemId)) {
as.add(avgRatings.get(itemId).getRating());
bs.add(en.getValue().getRating());
}
}
double pcc = Sims.pcc(as, bs);
if (!Double.isNaN(pcc))
user_integrity.put(u, pcc);
}
return user_integrity;
}
protected double predictability(String a, String b, double theta) {
Map<String, Rating> asRatings = userRatingsMap.get(a);
Map<String, Rating> bsRatings = userRatingsMap.get(b);
if (asRatings == null || asRatings.size() < 1)
return Double.NaN;
if (bsRatings == null || bsRatings.size() < 1)
return Double.NaN;
int nu = 0, nn = 0, np = 0;
int common = 0;
for (Entry<String, Rating> en : asRatings.entrySet()) {
String item = en.getKey();
double ra = en.getValue().getRating();
if (bsRatings.containsKey(item)) {
double rb = bsRatings.get(item).getRating();
double e = ra - rb;
if (e > theta)
nn++;
else if (e < -theta)
np++;
else
nu++;
common++;
}
}
int max = Stats.max(new int[] { nu, nn, np })[0];
int min = Stats.min(new int[] { nu, nn, np })[0];
return (max - min + 0.0) / common;
}
protected int common_experience(String a, String b) {
Map<String, Rating> asRatings = userRatingsMap.get(a);
Map<String, Rating> bsRatings = userRatingsMap.get(b);
if (asRatings == null || bsRatings == null)
return 0;
int num = 0;
for (String item : asRatings.keySet()) {
if (bsRatings.containsKey(item)) {
num++;
}
}
return num;
}
protected void train_model() throws Exception {
int i = 0, s = 20000;
int num_pos = 0;
int num_neg = 0;
double[][] X = new double[s][4];
double[] Y = new double[s];
this.user_ins = integrity();
Logs.debug("Training logistic model ...");
for (String a : userTNsMap.keySet()) {
if (!userDNsMap.containsKey(a))
continue;
// positive instances
Map<String, Double> tns = userTNsMap.get(a);
for (Entry<String, Double> en : tns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
X[i][0] = be;
X[i][1] = co;
X[i][2] = in;
X[i][3] = pr;
Y[i] = 1.0;
i++;
num_pos++;
}
// negative instances
Map<String, Double> dns = userDNsMap.get(a);
for (Entry<String, Double> en : dns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
X[i][0] = be;
X[i][1] = co;
X[i][2] = in;
X[i][3] = pr;
Y[i] = 0.0;
i++;
num_neg++;
}
}
Logs.info("positive intances: " + num_pos + ", negative instances: "
+ num_neg);
Problem prob = new Problem();
prob.l = i; // number of training examples
prob.n = 5; // number of features + bias
prob.bias = 1;// the value of bias
prob.y = new double[i];
for (int k = 0; k < i; k++)
prob.y[k] = Y[k];
prob.x = new FeatureNode[i][];
for (int k = 0; k < i; k++) {
List<FeatureNode> fns = new ArrayList<>();
for (int p = 0; p < 4; p++) {
if (!Double.isNaN(X[k][p])) {
fns.add(new FeatureNode(p + 1, X[k][p]));
}
}
prob.x[k] = new FeatureNode[fns.size()];
for (int m = 0; m < fns.size(); m++) {
prob.x[k][m] = fns.get(m);
}
}
SolverType solver = SolverType.L2R_LR; // -s 0
double C = 1.0; // cost of constraints violation
double eps = 0.001; // stopping criteria
Parameter param = new Parameter(solver, C, eps);
Model model = Linear.train(prob, param);
if (Debug.ON) {
String dir = Dataset.DIRECTORY + "Models/";
FileIO.makeDirectory(dir);
File modelFile = new File(dir + "model.txt");
model.save(modelFile);
}
this.model = model;
Logs.info("Learned features weights:"
+ Strings.toString(model.getFeatureWeights()));
Logs.debug("Done!");
}
private double combine_features(double[] ws, double[] fs, int... indexes) {
double val = 0;
for (int i : indexes) {
if (!Double.isNaN(fs[i])) {
val += ws[i] * fs[i];
}
}
// bias
val += ws[4] * 1.0;
// trust
val = 1.0 / (1.0 + Math.exp(-val));
return val;
}
/**
* Trust values are re-generated based on different features<br/>
* Distrust values are not re-generated as we will not use them in our work.
*
*/
protected void gen_trust() throws Exception {
double[] ws = model.getFeatureWeights();
/**
* generate trust information
*/
Logs.debug("Predict trust values ...");
for (String a : userTNsMap.keySet()) {
// positive instances
Map<String, Double> tns = userTNsMap.get(a);
for (Entry<String, Double> en : tns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
double[] fs = new double[4];
fs[0] = be;
fs[1] = co;
fs[2] = in;
fs[3] = pr;
// single component
String line = a + " " + b;
String content = null;
int[] indexes = null;
double val = 0;
for (int i = 0; i < 4; i++) {
indexes = new int[] { i };
val = combine_features(ws, fs, indexes);
content = line + " " + val;
output_trust(content, indexes);
}
// double components
for (int i = 0; i < 4; i++) {
for (int j = i + 1; j < 4; j++) {
indexes = new int[] { i, j };
val = combine_features(ws, fs, indexes);
content = line + " " + val;
output_trust(content, indexes);
}
}
// three components
for (int i = 0; i < 4; i++) {
indexes = Randoms.nextIntArray(3, 0, 4,
new int[] { i });
val = combine_features(ws, fs, indexes);
content = line + " " + val;
output_trust(content, indexes);
}
// four components
indexes = new int[] { 0, 1, 2, 3 };
val = combine_features(ws, fs, indexes);
content = line + " " + val;
output_trust(content, indexes);
}
}
Logs.debug("Done!");
}
private void output_trust(String line, int... indexes) throws Exception {
String dir = Dataset.DIRECTORY + "Trust/";
for (int i = 0; i < indexes.length; i++) {
int index = indexes[i];
if (i > 0)
dir += "_";
dir += index;
}
dir += "/";
FileIO.makeDirectory(dir);
String file = dir + "trust.txt";
FileIO.writeString(file, line, true);
}
/**
* This stub method is used for initially testing (previous)
*
* @throws Exception
*/
protected void train_model_stub() throws Exception {
/**
* training settings
*/
int i = 0, s = 20000;
int num_pos = 0;
int num_neg = 0;
double[][] X = new double[s][4];
double[] Y = new double[s];
/**
* testing settings
*/
int i_test = 0, s_test = 20000;
int test_pos = 0;
int test_neg = 0;
double[][] X_test = new double[s_test][4];
double[] Y_test = new double[s_test];
int count = 0;
Map<String, Double> user_ins = integrity();
for (String a : userTNsMap.keySet()) {
if (!userDNsMap.containsKey(a))
continue;
// prepare for the training data
count++;
if (count > 300)
break;
if (count < 200) {
// positive instances
Map<String, Double> tns = userTNsMap.get(a);
for (Entry<String, Double> en : tns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
X[i][0] = be;
X[i][1] = co;
X[i][2] = in;
X[i][3] = pr;
Y[i] = 1.0;
i++;
num_pos++;
}
// negative instances
Map<String, Double> dns = userDNsMap.get(a);
for (Entry<String, Double> en : dns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
X[i][0] = be;
X[i][1] = co;
X[i][2] = in;
X[i][3] = pr;
Y[i] = 0.0;
i++;
num_neg++;
}
// System.out.println("count=" + count);
}
else {
// Logs.debug("Collecting data for testing process ...");
Map<String, Double> tns = userTNsMap.get(a);
for (Entry<String, Double> en : tns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
X_test[i_test][0] = be;
X_test[i_test][1] = co;
X_test[i_test][2] = in;
X_test[i_test][3] = pr;
Y_test[i_test] = 1.0;
i_test++;
test_pos++;
}
// negative instances
Map<String, Double> dns = userDNsMap.get(a);
for (Entry<String, Double> en : dns.entrySet()) {
String b = en.getKey();
if (a.equals(b))
continue;
double be = benevolence(a, b);
double co = competence(a, b, epsilon);
double in = Double.NaN;
if (user_ins.containsKey(b))
in = user_ins.get(b);
double pr = predictability(a, b, theta);
X_test[i_test][0] = be;
X_test[i_test][1] = co;
X_test[i_test][2] = in;
X_test[i_test][3] = pr;
Y_test[i_test] = 0.0;
i_test++;
test_neg++;
}
}
}
Logs.info("positive intance: " + num_pos + ", negative instance: "
+ num_neg);
Logs.info("positive intance for test: " + test_pos
+ ", negative instance for test: " + test_neg);
// Logs.debug("Applying logistic regression ...");
// testing setting
int total = i_test;
// int test_num_neg = 0, test_num_pos = 0, test_neg1 = test_neg,
// test_pos1 = test_pos;
Feature[][] test_xs = new FeatureNode[total][];
double[] test_ys = new double[total];
// training setting
Problem prob = new Problem();
prob.l = i; // number of training examples
prob.n = 5; // number of features + bias
prob.bias = 1;// the value of bias
prob.y = new double[i];
for (int k = 0; k < i; k++)
prob.y[k] = Y[k];
prob.x = new FeatureNode[i][];
// train data
for (int k = 0; k < i; k++) {
List<FeatureNode> fns = new ArrayList<>();
for (int p = 0; p < 4; p++) {
if (!Double.isNaN(X[k][p])) {
fns.add(new FeatureNode(p + 1, X[k][p]));
}
}
prob.x[k] = new FeatureNode[fns.size()];
for (int m = 0; m < fns.size(); m++) {
prob.x[k][m] = fns.get(m);
}
}
// test data
for (int k = 0; k < i_test; k++) {
test_ys[k] = Y_test[k];
List<FeatureNode> fns = new ArrayList<>();
for (int p = 0; p < 4; p++) {
if (!Double.isNaN(X_test[k][p])) {
fns.add(new FeatureNode(p + 1, X_test[k][p]));
}
}
test_xs[k] = new FeatureNode[fns.size()];
for (int m = 0; m < fns.size(); m++) {
test_xs[k][m] = fns.get(m);
}
}
// total = test_num_pos + test_num_neg;
SolverType solver = SolverType.L2R_LR; // -s 0
double C = 1.0; // cost of constraints violation
double eps = 0.001; // stopping criteria
Parameter param = new Parameter(solver, C, eps);
Model model = Linear.train(prob, param);
if (Debug.ON) {
String dir = "Models/";
FileIO.makeDirectory(dir);
// File modelFile = new File(dir + "model_" + a + ".txt");
File modelFile = new File(dir + "model.txt");
model.save(modelFile);
// load model or use it directly
model = Model.load(modelFile);
}
int correct[] = new int[20];
int correct_trust[] = new int[20];
int correct_distrust[] = new int[20];
for (int k = 0; k < total; k++) {
Feature[] instance = test_xs[k];
double[] prob_estimates = new double[2];
double label = Linear.predictProbability(model, instance,
prob_estimates);
if (Debug.OFF) {
if (test_ys[k] == label)
// correct++;
Logs.debug("(" + test_ys[k] + ", " + label + ": "
+ Strings.toString(prob_estimates) + ")");
} else {
double estimate = prob_estimates[0];
// double threshold = (num_pos + 0.0) / (num_pos + num_neg);
for (int t = 0; 0.05 * t < 1; t++) {
if (estimate > 0.05 * t)
label = 1.0;
else
label = 0.0;
if (test_ys[k] == label) {
correct[t]++;
if (label == 1)
correct_trust[t]++;
}
}
}
}
for (int j = 0; j < 20; j++) {
correct_distrust[j] = correct[j] - correct_trust[j];
double accuracy = (correct[j] + 0.0) / total;
System.out.println("threshold=" + j
+ ": the number of correct trust prediction="
+ correct_trust[j] + "; the number of distrust prediction="
+ correct_distrust[j]);
System.out.println("accuracy=" + accuracy);
}
}
@Override
protected Performance runMultiThreads() throws Exception {
train_model();
gen_trust();
return null;
}
}