package happy.research.cf;
import happy.coding.io.FileIO;
import happy.coding.io.KeyValPair;
import happy.coding.io.Lists;
import happy.coding.io.Logs;
import happy.coding.io.Strings;
import happy.coding.math.Sims;
import happy.coding.system.Debug;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
public class CBF_mt extends DefaultCF_mt
{
//format: {item: {tag: count}}
public static Map<String, Map<String, Integer>> itemTags;
public static List<String> tags;
//format: {item: {tag: tf}}
public static Map<String, Map<String, Double>> itemVecs;
//format: {tag: df}
public static Map<String, Double> tagIDFs;
public CBF_mt()
{
methodId = "Content-based CF";
}
@Override
protected void loadDataset() throws Exception
{
Dataset.RATING_SET = "ratings.csv";
Dataset.REGMX = ",";
super.load_ratings();
// load tags
load_tags();
}
private void load_tags() throws Exception
{
String file = "movie-tags.csv";
if (itemTags == null)
{
itemTags = new HashMap<>();
String path = FileIO.makeDirPath(Dataset.DIRECTORY, file);
Logs.debug("Load item's tags from {}", Strings.shortStr(path));
tags = new ArrayList<>();
BufferedReader br = new BufferedReader(new FileReader(new File(path)));
String line = null;
while ((line = br.readLine()) != null)
{
String[] data = line.split(",");
String item = data[0];
String tag = data[1]; //.toLowerCase().replace(" ", "");
if (!tags.contains(tag)) tags.add(tag);
Map<String, Integer> innerMap = null;
if (itemTags.containsKey(item)) innerMap = itemTags.get(item);
else innerMap = new HashMap<>();
int count = 0;
if (innerMap.containsKey(tag)) count = innerMap.get(tag);
count++;
innerMap.put(tag, count);
itemTags.put(item, innerMap);
}
br.close();
}
}
@Override
protected Performance runMultiThreads() throws Exception
{
boolean weighted = !true;
String dir = "D:\\Dropbox\\PhD\\My Work\\Algorithms\\@Machine Learning\\RecSys\\Assignments\\A3\\";
String file = "unweighted.txt";
if (weighted) file = "weighted.txt";
file = dir + file;
FileIO.deleteFile(file);
compItemVectors();
String[] users = {/* examples */"4045", "144", "3855", "1637", "2919",
/*tests*/"4934", "3511", "4835", "3362", "1270" };
for (int k = 0; k < users.length; k++)
{
String user = users[k];
Map<String, Rating> itemRatings = userRatingsMap.get(user);
double mean = RatingUtils.mean(itemRatings, null);
// build user profile
// {tag: score}
Map<String, Double> userVec = new HashMap<>();
for (String tag : tags)
userVec.put(tag, 0.0);
for (Entry<String, Rating> en : itemRatings.entrySet())
{
String item = en.getKey();
double rate = en.getValue().getRating();
double temp = rate;
if (weighted)
{
// since we need to sum all items rather than all positively rated items
rate = Dataset.maxScale;
}
if (rate >= 3.5)
{
Map<String, Double> itemVec = itemVecs.get(item);
for (Entry<String, Double> en2 : itemVec.entrySet())
{
String tag = en2.getKey();
double score = en2.getValue();
if (weighted)
{
score *= (temp - mean);
}
score += userVec.get(tag);
userVec.put(tag, score);
}
}
}
// DO Recommendations: {item: reco-socres}
Map<String, Double> itemScores = new HashMap<>();
for (String item : itemVecs.keySet())
{
if (itemRatings.containsKey(item)) continue;
List<Double> us = new ArrayList<>();
List<Double> vs = new ArrayList<>();
Map<String, Double> vec = itemVecs.get(item);
if (Debug.OFF)
{
for (Entry<String, Double> en3 : vec.entrySet())
{
String tag = en3.getKey();
double score = en3.getValue();
if (userVec.containsKey(tag))
{
us.add(userVec.get(tag));
vs.add(score);
}
}
} else
{
for (String tag : tags)
{
us.add(userVec.get(tag));
if (vec.containsKey(tag)) vs.add(vec.get(tag));
else vs.add(0.0);
}
}
double score = Sims.cos(us, vs);
itemScores.put(item, score);
}
List<KeyValPair<String>> pairs = Lists.sortMap(itemScores, true);
StringBuilder sb = new StringBuilder();
sb.append("recommendations for user " + user + ":\n");
for (int i = 0; i < 5; i++)
{
KeyValPair<String> pair = pairs.get(i);
sb.append(" " + pair.getKey() + ": " + Strings.toString(pair.getValue(), 4) + "\n");
}
if (k < 5) System.out.println(sb.toString());
else FileIO.writeString(file, sb.toString(), true);
}
return null;
}
private void compItemVectors()
{
// total items
int numItems = itemTags.size();
// build tag - IDF
tagIDFs = new HashMap<>();
for (Entry<String, Map<String, Integer>> en : itemTags.entrySet())
{
for (String tag : en.getValue().keySet())
{
double cnt = 0;
if (tagIDFs.containsKey(tag)) cnt = tagIDFs.get(tag);
cnt += 1;
tagIDFs.put(tag, cnt);
}
}
for (String tag : tagIDFs.keySet())
{
double cnt = tagIDFs.get(tag);
double df = numItems / cnt;
if (Debug.OFF)
{
// in fact, it usually is 1+cnt, rather than cnt
df = numItems / (1.0 + cnt);
}
tagIDFs.put(tag, Math.log(df));
}
// build item - TF-IDF
itemVecs = new HashMap<>();
for (String item : itemTags.keySet())
{
Map<String, Integer> tagCnts = itemTags.get(item);
Map<String, Double> tagVec = new HashMap<>();
double squareSum = 0;
for (Entry<String, Integer> en : tagCnts.entrySet())
{
String tag = en.getKey();
int cnt = en.getValue();
// typical way: tf = Math.log(1 + cnt);
// simple way: tf = cnt
double val = cnt * tagIDFs.get(tag);
tagVec.put(tag, val);
squareSum += val * val;
}
// normalization
double norm = Math.sqrt(squareSum);
for (String tag : tagVec.keySet())
tagVec.put(tag, tagVec.get(tag) / norm);
itemVecs.put(item, tagVec);
}
}
}