package happy.research.cf;
import happy.coding.io.FileIO;
import happy.coding.io.FileIO.Converter;
import happy.coding.io.FileIO.MapWriter;
import happy.coding.io.Lists;
import happy.coding.io.Logs;
import happy.coding.math.Randoms;
import happy.coding.math.Stats;
import happy.coding.system.Systems;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.junit.Ignore;
import org.junit.Test;
public class DatasetUtils
{
@SuppressWarnings("unchecked")
public static <E> List<E>[] splitCollection(List<E> data, int num)
{
int size = data.size();
int lenThread = 1 + size / num;
List<E>[] test = new List[num];
for (int i = 0; i < test.length; i++)
{
test[i] = new ArrayList<>(lenThread);
for (int j = 0; j < lenThread; j++)
{
int index = i * lenThread + j;
if (index < size)
{
test[i].add(data.get(index));
} else
{
break;
}
}
}
return test;
}
/**
* @param trust_data_set
* @return Map[]{userTrusteesMap, userTrustorsMap, userTrustRatingsMap}
* @throws Exception
*/
@SuppressWarnings({ "rawtypes" })
public static Map[] loadTrustSet2(String trust_data_set) throws Exception
{
BufferedReader fr = new BufferedReader(new FileReader(trust_data_set));
Map<String, Map<String, Double>> userTrusteesMap = new HashMap<>();
Map<String, Map<String, Double>> userTrustorsMap = new HashMap<>();
Map<String, List<TrustRating>> userTrustRatingsMap = new HashMap<>();
String line = null;
Map<String, Double> tors = null, tees = null;
List<TrustRating> trs = null;
while ((line = fr.readLine()) != null)
{
String[] data = line.split(Dataset.REGMX);
String trustor = data[0];
String trustee = data[1];
double rating = Double.parseDouble(data[2]);
// if (trustee == trustor) continue; // to remove self-indicate
// entry
TrustRating tr = new TrustRating();
tr.setRating(rating);
tr.setTrustee(trustee);
tr.setTrustor(trustor);
if (userTrustRatingsMap.containsKey(trustee)) trs = userTrustRatingsMap.get(trustee);
else trs = new ArrayList<>();
trs.add(tr);
userTrustRatingsMap.put(trustee, trs); // {trustee - ratings on
// trustee}
if (userTrusteesMap.containsKey(trustor)) tees = userTrusteesMap.get(trustor);
else tees = new HashMap<>();
tees.put(trustee, 1.0);
userTrusteesMap.put(trustor, tees); // {trustor - trusted
// neighbours}
if (userTrustorsMap.containsKey(trustee)) tors = userTrustorsMap.get(trustee);
else tors = new HashMap<>();
tors.put(trustor, 1.0);
userTrustorsMap.put(trustee, tors); // {trustee - trustors on the
// trustee}
}
fr.close();
return new Map[] { userTrusteesMap, userTrustorsMap, userTrustRatingsMap };
}
public static Map<String, Map<String, Double>> loadTrusteeSet(String trustSet) throws Exception
{
BufferedReader fr = new BufferedReader(new FileReader(trustSet));
Map<String, Map<String, Double>> userTrustorsMap = new HashMap<>();
String line = null;
Map<String, Double> trustors = null;
while ((line = fr.readLine()) != null)
{
if (line.equals("")) continue;
String[] data = line.split(" ");
String trustor = data[0];
String trustee = data[1];
double trustScore = Double.parseDouble(data[2]);
if (trustee.equals(trustor)) continue; // to remove self-indicate entry
if (userTrustorsMap.containsKey(trustee)) trustors = userTrustorsMap.get(trustee);
else trustors = new HashMap<>();
trustors.put(trustor, trustScore);
userTrustorsMap.put(trustee, trustors);
}
fr.close();
return userTrustorsMap;
}
public static Map<String, Map<String, Double>> loadTrustSet(String trustSet) throws Exception
{
BufferedReader fr = new BufferedReader(new FileReader(trustSet));
Map<String, Map<String, Double>> userTNsMap = new HashMap<>();
String line = null;
Map<String, Double> tns = null;
while ((line = fr.readLine()) != null)
{
if (line.equals("")) continue;
String[] data = line.split(" ");
String trustor = data[0];
String trustee = data[1];
double trustScore = Double.parseDouble(data[2]);
if (trustee.equals(trustor)) continue; // to remove self-indicate entry
if (userTNsMap.containsKey(trustor)) tns = userTNsMap.get(trustor);
else tns = new HashMap<>();
tns.put(trustee, trustScore);
userTNsMap.put(trustor, tns);
}
fr.close();
return userTNsMap;
}
/**
* Read Trust Set and Distrust Set from trust.txt
*
* @param trust_data_set
* @return Map[]{TrustSet, DistrustSet}
* @throws Exception
*/
@SuppressWarnings("rawtypes")
public static Map[] loadTrustDistrustSets(String trust_data_set) throws Exception
{
BufferedReader fr = new BufferedReader(new FileReader(trust_data_set));
/* Trust Set */
Map<String, List<String>> usersTNsMap = new HashMap<>();
/* Distrust Set */
Map<String, List<String>> usersDTNsMap = new HashMap<>();
String line = null;
List<String> tns = null, dtns = null;
while ((line = fr.readLine()) != null)
{
String[] data = line.split(" ");
String trustor = data[0];
String trustee = data[1];
int value = Integer.parseInt(data[2]);
if (trustee.equals(trustor)) continue; // to remove self-indicate entry
if (value == 1)
{
if (usersTNsMap.containsKey(trustor))
{
tns = usersTNsMap.get(trustor);
tns.add(trustee);
} else
{
tns = new ArrayList<>();
tns.add(trustee);
}
usersTNsMap.put(trustor, tns);
} else if (value == -1)
{
if (usersDTNsMap.containsKey(trustor))
{
dtns = usersDTNsMap.get(trustor);
dtns.add(trustee);
} else
{
dtns = new ArrayList<>();
dtns.add(trustee);
}
usersDTNsMap.put(trustor, dtns);
}
}
fr.close();
return new Map[] { usersTNsMap, usersDTNsMap };
}
public static void convertEpinionsTrust() throws Exception
{
String dirPath = "D:\\Java\\eclipse\\workspace\\CF-RS\\dataset\\Extended Epinions\\";
String source = dirPath + "rating.txt";
String ids = dirPath + "itemId-mappings.txt";
String target = dirPath + "ratings.txt";
BufferedReader br = new BufferedReader(new FileReader(new File(source)));
String line = null;
Map<String, String> idMap = FileIO.readAsMap(ids);
// Map<String, Integer> idMap = new HashMap<String, Integer>();
List<String> lines = new ArrayList<>();
while ((line = br.readLine()) != null)
{
String[] data = line.split(Dataset.REGMX);
String userId = data[0];
String itemId = data[1];
String rating = data[2];
String iId = idMap.get(itemId);
String content = userId + Dataset.REGMX + iId + Dataset.REGMX + rating;
lines.add(content);
if (lines.size() == 1000)
{
FileIO.writeList(target, lines, null, true);
lines.clear();
}
}
if (lines.size() > 0) FileIO.writeList(target, lines, null, true);
br.close();
}
/**
* Sample data set
*
* @throws Exception
*/
@Ignore
@Test
public void sampleRatingDataset() throws Exception
{
ConfigParams.defaultInstance();
String source = Dataset.DIRECTORY + Dataset.RATING_SET;
String target = Systems.getDesktop() + Dataset.RATING_SET;
int max = 664_824;
double percent = 0.06;
int sample = (int) (max * percent);
int len = 1000;
int iteration = sample / len + (sample % len == 0 ? 0 : 1);
int step = max / iteration;
int start, end;
for (int i = 0; i < iteration; i++)
{
start = 1 + step * i;
if (i == iteration - 1)
{
end = max;
len = sample - i * len;
} else end = start + step;
Logs.debug("len, min, max = {}, {}, {}", new Object[] { len, start, end });
int[] idsArray = Randoms.nextIntArray(len, start, end);
String lines = FileIO.readAsString(source, idsArray);
FileIO.writeString(target, lines, true);
}
}
public static void convertFlixsterTrust() throws Exception
{
String unformatted_file = "D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\Flixter\\trust.txt";
String formatted_file = "C:\\Users\\guoguibing\\Desktop\\trust.txt";
BufferedReader br = new BufferedReader(new FileReader(unformatted_file));
BufferedWriter bw = new BufferedWriter(new FileWriter(formatted_file));
String line = null;
while ((line = br.readLine()) != null)
{
line = line.replace("\t", Dataset.REGMX);
String data[] = line.split(Dataset.REGMX);
int trustor = Integer.parseInt(data[0]);
int trustee = Integer.parseInt(data[1]);
String line1 = trustor + Dataset.REGMX + trustee + Dataset.REGMX + "1.0";
String line2 = trustee + Dataset.REGMX + trustor + Dataset.REGMX + "1.0";
bw.write(line1 + "\n");
bw.write(line2 + "\n");
}
br.close();
bw.close();
}
public static void convertJester() throws Exception
{
String unformatted_file = "D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\Jester\\ratings_original.txt";
String formatted_file = "C:\\Users\\guoguibing\\Desktop\\ratings.txt";
BufferedReader br = new BufferedReader(new FileReader(unformatted_file));
BufferedWriter bw = new BufferedWriter(new FileWriter(formatted_file));
String line = null;
while ((line = br.readLine()) != null)
{
line = line.replace("\t", Dataset.REGMX);
String data[] = line.split(Dataset.REGMX);
double rating = Double.parseDouble(data[2]);
if (rating >= 6 && rating <= 10) rating = 5.0;
else if (rating >= 2) rating = 4.0;
else if (rating >= -2) rating = 3.0;
else if (rating >= -6) rating = 2.0;
else if (rating >= -10) rating = 1.0;
String msg = data[0] + Dataset.REGMX + data[1] + Dataset.REGMX + rating + "\n";
bw.write(msg);
}
br.close();
bw.close();
}
public static void convertMovieLensRating(String unformatted_file, String formatted_file, String regex)
throws Exception
{
BufferedReader br = new BufferedReader(new FileReader(unformatted_file));
BufferedWriter bw = new BufferedWriter(new FileWriter(formatted_file));
String line = null;
while ((line = br.readLine()) != null)
{
line = line.replaceAll(regex, Dataset.REGMX);
bw.write(line + "\n");
}
br.close();
bw.close();
}
public static void convertJesterRating(String unformatted_file, String formatted_file, String regex)
throws Exception
{
BufferedReader br = new BufferedReader(new FileReader(unformatted_file));
BufferedWriter bw = new BufferedWriter(new FileWriter(formatted_file));
String line = null;
while ((line = br.readLine()) != null)
{
line = line.replaceAll(regex, Dataset.REGMX);
bw.write(line + "\n");
}
br.close();
bw.close();
}
public static void convertFilmTrustRating() throws Exception
{
String line = null;
Map<String, Integer> usersMap = new HashMap<>();
BufferedReader br_users = new BufferedReader(new FileReader(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\users.dat"));
while ((line = br_users.readLine()) != null)
{
String[] data = line.split("::");
Integer id = new Integer(data[0]);
String user = data[1];
usersMap.put(user, id);
}
br_users.close();
Map<String, Integer> itemsMap = new HashMap<>();
BufferedReader br_items = new BufferedReader(new FileReader(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\items.dat"));
line = null;
while ((line = br_items.readLine()) != null)
{
String[] data = line.split("::");
Integer id = new Integer(data[0]);
String item = data[1];
itemsMap.put(item, id);
}
br_items.close();
BufferedReader br_ratings = new BufferedReader(new FileReader(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\ratings.dat"));
BufferedWriter bw_ratings = new BufferedWriter(new FileWriter(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\ratings.txt"));
line = null;
while ((line = br_ratings.readLine()) != null)
{
String[] data = line.split("::");
String user = data[0];
String item = data[1];
String rating = data[2];
int userId = usersMap.get(user);
int itemId = itemsMap.get(item);
bw_ratings.write(userId + Dataset.REGMX + itemId + Dataset.REGMX + rating + "\n");
}
br_ratings.close();
bw_ratings.close();
BufferedReader br_trust = new BufferedReader(new FileReader(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\trust.dat"));
BufferedWriter bw_trust = new BufferedWriter(new FileWriter(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\trust.txt"));
line = null;
while ((line = br_trust.readLine()) != null)
{
String[] data = line.split("::");
String trustor = data[0];
String trustee = data[1];
String rating = data[2];
Integer trustorId = usersMap.get(trustor);
if (trustorId == null)
{
trustorId = usersMap.keySet().size() + 1;
usersMap.put(trustor, trustorId);
}
Integer trusteeId = usersMap.get(trustee);
if (trusteeId == null)
{
trusteeId = usersMap.keySet().size() + 1;
usersMap.put(trustee, trusteeId);
}
bw_trust.write(trustorId + Dataset.REGMX + trusteeId + Dataset.REGMX + rating + "\n");
}
br_trust.close();
bw_trust.close();
BufferedWriter bw_users = new BufferedWriter(new FileWriter(
"D:\\Dropbox\\Coding\\Java_Projects\\HappyCoding\\dataset\\FilmTrust\\users.dat2"));
line = null;
for (Entry<String, Integer> en : usersMap.entrySet())
{
String user = en.getKey();
int id = en.getValue();
bw_users.write(id + "::" + user + "\n");
}
bw_users.close();
}
public static void convertItems() throws Exception
{
String dirPath1 = "D:\\Data Sets\\Netflix\\netflix\\download\\ratings_items";
String dirPath2 = "D:\\Data Sets\\Netflix\\netflix\\download\\ratings_users\\ratings_";
File dir1 = new File(dirPath1);
File[] files = dir1.listFiles();
for (File file : files)
{
BufferedReader br = new BufferedReader(new FileReader(file));
String line = null;
while ((line = br.readLine()) != null)
{
String[] data = line.split(Dataset.REGMX);
int user = Integer.parseInt(data[0]);
FileWriter bw = new FileWriter(dirPath2 + user + ".txt", true);
bw.write(line + "\n");
bw.close();
}
br.close();
}
}
public static void combineNetflix() throws Exception
{
String dirPath = "D:\\Data Sets\\Netflix\\netflix\\download\\1000";
File dir = new File(dirPath);
File[] files = dir.listFiles();
String dest = "D:\\Data Sets\\Netflix\\netflix\\download\\1000\\" + Dataset.RATING_SET;
BufferedWriter bw = new BufferedWriter(new FileWriter(dest, true));
for (File file : files)
{
BufferedReader br = new BufferedReader(new FileReader(file));
String line = null;
while ((line = br.readLine()) != null)
bw.write(line + "\n");
br.close();
}
bw.close();
}
public static void convertNetflix(String[] args) throws Exception
{
String dirPath = "D:\\Data Sets\\Netflix\\netflix\\download\\training_set";
File dir = new File(dirPath);
File[] files = dir.listFiles();
String dest = "D:\\Data Sets\\Netflix\\netflix\\download\\ratings\\ratings_";
for (File file : files)
{
BufferedReader br = new BufferedReader(new FileReader(file));
BufferedWriter bw = null;
String line = null;
int item = 0;
while ((line = br.readLine()) != null)
{
if (line.endsWith(":"))
{
item = Integer.parseInt(line.split(":")[0]);
bw = new BufferedWriter(new FileWriter(dest + item + ".txt"));
} else
{
String[] data = line.split(",");
int user = Integer.parseInt(data[0]);
int rating = Integer.parseInt(data[1]);
String msg = user + " " + item + " " + rating + "\n";
bw.write(msg);
}
}
br.close();
bw.close();
}
}
/**
* Sampling Netflix only containing a fix number of users, each rated at
* least a fix number of items
*
* @param users
* the number of users
* @param items
* the number of items for each user
* @throws Exception
*/
public static void samplingDataset(int users) throws Exception
{
ConfigParams.defaultInstance();
String source = Dataset.DIRECTORY + Dataset.RATING_SET;
String dest = Systems.getDesktop() + Dataset.RATING_SET + "." + users;
Map<String, Map<String, Rating>> userRatingsMap = Dataset.loadRatingSet(source);
Set<String> userSet = userRatingsMap.keySet();
int[] indexArray = Randoms.nextIntArray(users, 1, userSet.size(), null);
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dest)));
int count = 0;
int index = 0;
for (String user : userSet)
{
if (count == indexArray[index])
{
Map<String, Rating> rs = userRatingsMap.get(user);
for (Rating r : rs.values())
{
String msg = r.getUserId() + Dataset.REGMX + r.getItemId() + Dataset.REGMX + r.getRating() + "\n";
bw.write(msg);
}
index++;
if (index >= indexArray.length) break;
}
count++;
}
Logs.debug("index = " + index + " \t count = " + count);
bw.close();
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void samplingDatasetByItems(int num_users, int num_items) throws Exception
{
String source = Dataset.DIRECTORY + Dataset.RATING_SET;
String dirPath = Dataset.DIRECTORY + "Sample_" + num_items + "_items/";
String dest = dirPath + Dataset.RATING_SET;
/* Step 1: sample items-related user ratings */
Map[] data = Dataset.loadTrainSet(source);
Map<String, Map<String, Rating>> userRatingsMap = data[0];
Map<String, Map<String, Rating>> itemRatingsMap = data[1];
List<String> itemSet = new ArrayList<>(itemRatingsMap.keySet());
int[] itemArray = Randoms.nextIntArray(num_items, 1, itemSet.size(), null);
List<String> items = new ArrayList<>();
for (int id : itemArray)
items.add(itemSet.get(id));
List<String> users = new ArrayList<>();
for (String user : userRatingsMap.keySet())
{
Map<String, Rating> itemRatings = userRatingsMap.get(user);
int count = 0;
for (String item : items)
{
if (itemRatings.containsKey(item)) count++;
if (count >= 5)
{
users.add(user);
break;
}
}
}
List<String> userSet = null;
if (num_users < users.size())
{
userSet = new ArrayList<>();
int[] userArray = Randoms.nextIntArray(num_users, 1, users.size(), null);
for (int id : userArray)
userSet.add(users.get(id));
} else
{
userSet = users;
}
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dest)));
int count = 0;
for (String user : userSet)
{
Map<String, Rating> itemRatings = userRatingsMap.get(user);
for (String item : items)
{
if (itemRatings.containsKey(item))
{
Rating r = itemRatings.get(item);
String msg = r.getUserId() + Dataset.REGMX + r.getItemId() + Dataset.REGMX + r.getRating() + "\n";
bw.write(msg);
count++;
}
}
}
Logs.debug("Retrieved users: " + userSet.size() + ", items: " + items.size() + ", ratings: " + count);
Logs.debug("Saved the rating sample to: " + dest);
bw.close();
}
@Test
public void sampleByItems() throws Exception
{
int num_users = 3000;
int num_items = 2000;
ConfigParams.defaultInstance();
String dirPath = Dataset.DIRECTORY + "Sample_" + num_items + "_items/";
FileIO.deleteDirectory(dirPath);
FileIO.makeDirectory(dirPath);
samplingDatasetByItems(num_users, num_items);
String trustPath = Dataset.DIRECTORY + Dataset.TRUST_SET;
retrieveTrustData(dirPath, trustPath);
splitKFoldDataset(dirPath);
Logs.debug("Data sampling is done!");
}
@Test
public void sampleByUsers() throws Exception
{
samplingDataset(5000);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void samplingJester(int users, int items) throws Exception
{
ConfigParams.defaultInstance();
String source = Dataset.DIRECTORY + Dataset.RATING_SET;
String dest = "C:/Users/guoguibing/Desktop/ratings.txt";
Map[] maps = Dataset.loadTrainSet(source);
Map<Integer, List<Rating>> userRatingsMap = maps[0];
Map<Integer, List<Rating>> data = new HashMap<>();
for (Entry<Integer, List<Rating>> en : userRatingsMap.entrySet())
{
List<Rating> rs = en.getValue();
if (rs.size() > items)
{
data.put(en.getKey(), rs);
}
}
Set<Integer> userSet = data.keySet();
int indexArray[] = Randoms.nextIntArray(users, 0, userSet.size(), null);
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dest)));
int count = 0;
int index = 0;
for (int user : userSet)
{
if (count == indexArray[index])
{
List<Rating> rs = userRatingsMap.get(user);
for (Rating r : rs)
{
String msg = r.getUserId() + Dataset.REGMX + r.getItemId() + Dataset.REGMX + r.getRating() + "\n";
bw.write(msg);
}
index++;
if (index >= indexArray.length) break;
}
count++;
}
System.out.println("index = " + index + " \t count = " + count);
bw.close();
}
@Test
public void samplingNetflix() throws Exception
{
ConfigParams.defaultInstance();
String source = Dataset.DIRECTORY + Dataset.RATING_SET;
String dest = Dataset.DIRECTORY + "Sample" + Systems.FILE_SEPARATOR + Dataset.RATING_SET;
FileIO.deleteFile(dest);
BufferedReader br = new BufferedReader(new FileReader(new File(source)));
String line = null;
List<String> lines = new ArrayList<>(3000);
int numUser = 1000;
while ((line = br.readLine()) != null)
{
String[] data = line.split(" ");
int userId = Integer.parseInt(data[0]);
if (userId > numUser) continue;
lines.add(line);
if (lines.size() > 1024)
{
FileIO.writeList(dest, lines, null, true);
lines.clear();
}
}
br.close();
if (lines.size() > 0) FileIO.writeList(dest, lines, null, true);
}
/**
* Sampling Netflix with each user only rated a few no. of ratings
*
* @param mean
* desired number of ratings per user
* @throws Exception
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void samplingByMean(String src, int mean) throws Exception
{
ConfigParams.defaultInstance();
String source = src;
if (source == null) source = Dataset.DIRECTORY + Dataset.RATING_SET;
String dirStr = "C:/Users/guoguibing/Desktop/" + Dataset.LABEL + "/" + mean + "/";
File dir = new File(dirStr);
if (!dir.exists()) dir.mkdirs();
Map[] maps = Dataset.loadTrainSet(source);
Map<Integer, List<Rating>> userRatingsMap = maps[0];
Map<Integer, List<Rating>> itemRatingsMap = maps[1];
double average = 0.0;
double sum = 0.0;
for (Entry<Integer, List<Rating>> en : itemRatingsMap.entrySet())
{
List<Rating> rs = en.getValue();
sum += rs.size();
}
average = sum / itemRatingsMap.size();
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dirStr + Dataset.RATING_SET)));
for (Entry<Integer, List<Rating>> en : userRatingsMap.entrySet())
{
List<Rating> rs = en.getValue();
int size = rs.size();
int save = (int) (mean * size / average + 0.5);
int index[] = Randoms.nextIntArray(save, 0, size, null);
for (int item : index)
{
Rating r = rs.get(item);
String msg = r.getUserId() + Dataset.REGMX + r.getItemId() + Dataset.REGMX + r.getRating() + "\n";
bw.write(msg);
}
}
bw.close();
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void checkRatingsSpecification(String ratings_set) throws Exception
{
Map[] maps = Dataset.loadTrainSet(ratings_set);
HashMap<Integer, List<Rating>> userRatingsMap = (HashMap<Integer, List<Rating>>) maps[0];
int count_0 = 0, count_1 = 0, count_2 = 0, count_3 = 0, count_4 = 0, count_5 = 0, count_6 = 0, count_7 = 0, count_8 = 0, count_9 = 0, count = 0;
int count_00 = 0, count_10 = 0, count_20 = 0, count_30 = 0, count_40 = 0, count_50 = 0, count_60 = 0, count_70 = 0, count_80 = 0, count_90 = 0;
List<Double> sizes = new ArrayList<>();
for (int i = 0; i < 63974 - userRatingsMap.size(); i++)
sizes.add(0.0);
for (Entry<Integer, List<Rating>> en : userRatingsMap.entrySet())
{
List<Rating> ratings = en.getValue();
sizes.add((double) ratings.size());
for (Rating r : ratings)
{
count++;
switch ((int) (r.getRating()))
{
case -1:
count_10++;
break;
case -2:
count_20++;
break;
case -3:
count_30++;
break;
case -4:
count_40++;
break;
case -5:
count_50++;
break;
case -6:
count_60++;
break;
case -7:
count_70++;
break;
case -8:
count_80++;
break;
case -9:
count_90++;
break;
case 0:
if (r.getRating() < 0) count_00++;
else count_0++;
break;
case 1:
count_1++;
break;
case 2:
count_2++;
break;
case 3:
count_3++;
break;
case 4:
count_4++;
break;
case 5:
count_5++;
break;
case 6:
count_6++;
break;
case 7:
count_7++;
break;
case 8:
count_8++;
break;
case 9:
count_9++;
break;
}
}
}
double[] da = Lists.toArray(sizes);
System.out.println("Mean ratings per user = " + Stats.mean(da) + ", std = " + Stats.sd(sizes) + ", max = "
+ Stats.max(da)[0] + ", min = " + Stats.min(da)[0]);
HashMap<Integer, List<Rating>> itemRatingsMap = (HashMap<Integer, List<Rating>>) maps[1];
List<Double> itemSizes = new ArrayList<>();
double item0 = 150 - itemRatingsMap.size();
for (int i = 0; i < item0; i++)
itemSizes.add(0.0);
for (Entry<Integer, List<Rating>> en : itemRatingsMap.entrySet())
{
List<Rating> rs = en.getValue();
itemSizes.add((double) rs.size());
}
double[] dai = Lists.toArray(itemSizes);
System.out.println("Mean ratings per item = " + Stats.mean(dai) + ", std = " + Stats.sd(itemSizes) + ", max = "
+ Stats.max(dai)[0]);
System.out.println("Ratio(r=-9) = " + count_90 + "/" + count + " = " + (count_90 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-8) = " + count_80 + "/" + count + " = " + (count_80 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-7) = " + count_70 + "/" + count + " = " + (count_70 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-6) = " + count_60 + "/" + count + " = " + (count_60 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-5) = " + count_50 + "/" + count + " = " + (count_50 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-4) = " + count_40 + "/" + count + " = " + (count_40 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-3) = " + count_30 + "/" + count + " = " + (count_30 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-2) = " + count_20 + "/" + count + " = " + (count_20 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-1) = " + count_10 + "/" + count + " = " + (count_10 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=-0) = " + count_00 + "/" + count + " = " + (count_00 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=0) = " + count_0 + "/" + count + " = " + (count_0 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=1) = " + count_1 + "/" + count + " = " + (count_1 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=2) = " + count_2 + "/" + count + " = " + (count_2 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=3) = " + count_3 + "/" + count + " = " + (count_3 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=4) = " + count_4 + "/" + count + " = " + (count_4 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=5) = " + count_5 + "/" + count + " = " + (count_5 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=6) = " + count_6 + "/" + count + " = " + (count_6 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=7) = " + count_7 + "/" + count + " = " + (count_7 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=8) = " + count_8 + "/" + count + " = " + (count_8 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(r=9) = " + count_9 + "/" + count + " = " + (count_9 + 0.0) * 100.0 / count + "%");
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void checkTrustSpecification(String trust_set) throws Exception
{
Map[] maps = loadTrustSet2(trust_set);
Map<Integer, List<Integer>> userTrustees = maps[0];
int count_1 = 0, count_2 = 0, count_3 = 0, count_4 = 0, count_5 = 0, count_6 = 0, count_7 = 0, count_others = 0;
int count_11 = 0, count_22 = 0, count_33 = 0, count_44 = 0, count_55 = 0, count_66 = 0, count_77 = 0, count_others8 = 0;
int count = userTrustees.size();
int total = 0;
List<Double> trusteeSizes = new ArrayList<>();
for (Entry<Integer, List<Integer>> en : userTrustees.entrySet())
{
List<Integer> trustees = en.getValue();
int size = trustees.size();
total += size;
trusteeSizes.add((double) size);
switch (size)
{
case 1:
count_1++;
count_11 += size;
break;
case 2:
count_2++;
count_22 += size;
break;
case 3:
count_3++;
count_33 += size;
break;
case 4:
count_4++;
count_44 += size;
break;
case 5:
count_5++;
count_55 += size;
break;
case 6:
count_6++;
count_66 += size;
break;
case 7:
count_7++;
count_77 += size;
break;
default:
count_others++;
count_others8 += size;
break;
}
}
System.out.println("Trustors No. = " + count);
System.out.println("Ratio(t=1) = " + count_1 + "/" + count + " = " + (count_1 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t=2) = " + count_2 + "/" + count + " = " + (count_2 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t=3) = " + count_3 + "/" + count + " = " + (count_3 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t=4) = " + count_4 + "/" + count + " = " + (count_4 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t=5) = " + count_5 + "/" + count + " = " + (count_5 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t=6) = " + count_6 + "/" + count + " = " + (count_6 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t=7) = " + count_7 + "/" + count + " = " + (count_7 + 0.0) * 100.0 / count + "%");
System.out.println("Ratio(t>7) = " + count_others + "/" + count + " = " + (count_others + 0.0) * 100.0 / count
+ "%");
System.out.println();
System.out.println("Ratio(t=1) = " + count_11 + "/" + total + " = " + (count_11 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t=2) = " + count_22 + "/" + total + " = " + (count_22 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t=3) = " + count_33 + "/" + total + " = " + (count_33 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t=4) = " + count_44 + "/" + total + " = " + (count_44 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t=5) = " + count_55 + "/" + total + " = " + (count_55 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t=6) = " + count_66 + "/" + total + " = " + (count_66 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t=7) = " + count_77 + "/" + total + " = " + (count_77 + 0.0) * 100.0 / total + "%");
System.out.println("Ratio(t>7) = " + count_others8 + "/" + total + " = " + (count_others8 + 0.0) * 100.0
/ total + "%");
double[] da = Lists.toArray(trusteeSizes);
System.out.println("Mean trustees per trustor = " + Stats.mean(da) + ", std = " + Stats.sd(trusteeSizes)
+ ", max = " + Stats.max(da)[0]);
List<Double> trustorSizes = new ArrayList<>();
Map<Integer, List<Integer>> userTrustors = maps[1];
for (Entry<Integer, List<Integer>> en : userTrustors.entrySet())
{
List<Integer> trustors = en.getValue();
int size = trustors.size();
trustorSizes.add((double) size);
}
double[] dao = Lists.toArray(trustorSizes);
System.out.println("Mean trustors per trustee = " + Stats.mean(dao) + ", std = " + Stats.sd(trustorSizes)
+ ", max = " + Stats.max(dao)[0]);
}
@SuppressWarnings("rawtypes")
public static Map[] loadTestSet(String testSet, List<Rating> ratings) throws Exception
{
Map<String, Map<String, Rating>> userMap = new HashMap<>();
Map<String, Map<String, Rating>> itemMap = new HashMap<>();
BufferedReader fr = new BufferedReader(new FileReader(testSet));
String line = null;
while ((line = fr.readLine()) != null)
{
if (line.trim().isEmpty()) continue;
String[] data = line.split(Dataset.REGMX);
String userId = data[0];
String itemId = data[1];
double rating = Double.parseDouble(data[2]);
Long timestamp = 0l;
if (data.length > 3) timestamp = Long.parseLong(data[3]);
Rating r = new Rating();
r.setUserId(userId);
r.setItemId(itemId);
r.setRating(rating);
r.setTimestamp(timestamp);
ratings.add(r);
Map<String, Rating> itemRatings = null;
if (userMap.containsKey(userId)) itemRatings = userMap.get(userId);
else itemRatings = new HashMap<>();
itemRatings.put(itemId, r);
userMap.put(userId, itemRatings);
Map<String, Rating> userRatings = null;
if (itemMap.containsKey(itemId)) userRatings = itemMap.get(itemId);
else userRatings = new HashMap<>();
userRatings.put(userId, r);
itemMap.put(itemId, userRatings);
}
fr.close();
return new Map[] { userMap, itemMap };
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void splitFoldTrustSets() throws Exception
{
ConfigParams.defaultInstance();
String ratings_file = Dataset.DIRECTORY + Dataset.RATING_SET;
String trust_file = "dataset\\Epinions\\trust.txt";
Map[] ratingsMap = Dataset.loadTrainSet(ratings_file);
Map<Integer, List<Rating>> userRatingsMap = ratingsMap[0];
Set<Integer> users = userRatingsMap.keySet();
BufferedReader br = new BufferedReader(new FileReader(new File(trust_file)));
BufferedWriter bw = new BufferedWriter(new FileWriter(new File("C:\\Users\\guoguibing\\Desktop\\trust.txt")));
String line = null;
while ((line = br.readLine()) != null)
{
String[] data = line.split(Dataset.REGMX);
Integer trustor = new Integer(data[0]);
Integer trustee = new Integer(data[1]);
if (users.contains(trustor) && users.contains(trustee))
{
bw.write(line + "\n");
}
}
bw.close();
br.close();
}
/**
* Split a data set into k folds
*
* @throws Exception
*/
// @Ignore
public static void splitKFoldDataset(String dirPath) throws Exception
{
ConfigParams.defaultInstance();
String ratings_file = dirPath + Dataset.RATING_SET;
String tempDir = Systems.getDesktop();
Dataset.loadTrainSet(ratings_file);
int totalAmount = Dataset.size;
int kfold = 5;
int size = totalAmount / kfold;
int[] exceptions = new int[totalAmount];
int[] exs = new int[totalAmount];
int count = 0;
for (int k = 0; k < kfold; k++)
{
Logs.debug("Current step k = " + (k + 1));
int[] indexArray = null;
if (k < kfold - 1)
{
indexArray = Randoms.nextIntArray(size, 1, totalAmount + 1, exceptions);
for (int index : indexArray)
{
exceptions[count++] = index;
exs[index - 1] = index;
}
Arrays.sort(exceptions);
} else if (k == kfold - 1)
{
count = 0;
indexArray = new int[totalAmount - ((kfold - 1) * size)];
for (int i = 0; i < exs.length; i++)
{
if (exs[i] == 0)
{
indexArray[count++] = i + 1;
}
}
}
BufferedReader br = new BufferedReader(new FileReader(new File(ratings_file)));
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(tempDir + "k" + (k + 1))));
String line = null;
int cLine = 0;
int iCount = 0;
while ((line = br.readLine()) != null)
{
cLine++;
if (iCount >= indexArray.length) break;
if (cLine == indexArray[iCount])
{
iCount++;
bw.write(line + "\n");
}
}
bw.flush();
bw.close();
br.close();
Logs.debug("[Temporary] Write to " + tempDir + "k" + (k + 1));
}
/* combine into 5 folds */
String dest = FileIO.makeDirPath(dirPath, kfold + "fold");
// clean destination directory first
FileIO.deleteDirectory(dest);
FileIO.makeDirectory(dest);
for (int i = 0; i < kfold; i++)
{
String base = dest + "u" + (i + 1) + ".base";
String test = dest + "u" + (i + 1) + ".test";
for (int j = 0; j < kfold; j++)
{
String source = tempDir + "k" + (j + 1);
if (i == j) FileIO.copyFile(source, test);
else FileIO.writeString(base, FileIO.readAsString(source), true);
}
Logs.debug("[Finish] Write to {}", test);
Logs.debug("[Finish] Write to {}", base);
}
/* cope trust to k-fold directory */
FileIO.copyFile(dirPath + Dataset.TRUST_SET, dest + Dataset.TRUST_SET);
/* clean temporary data */
for (int i = 0; i < kfold; i++)
{
String source = tempDir + "k" + (i + 1);
FileIO.deleteFile(source);
}
}
public static void retrieveTrustData(String dirPath, String trustPath) throws Exception
{
ConfigParams.defaultInstance();
String ratingSet = dirPath + Dataset.RATING_SET;
Map<String, Map<String, Rating>> userMap = Dataset.loadRatingSet(ratingSet);
BufferedReader br = new BufferedReader(new FileReader(trustPath));
StringBuilder sb = new StringBuilder();
String line = null;
while ((line = br.readLine()) != null)
{
if (line.isEmpty()) continue;
String[] data = line.split(Dataset.REGMX);
String trustor = data[0];
String trustee = data[1];
if (userMap.containsKey(trustor) && userMap.containsKey(trustee)) sb.append(line + "\n");
}
br.close();
String filePath = dirPath + Dataset.TRUST_SET;
FileIO.writeString(filePath, sb.toString());
Logs.debug("Saved the trust sample to: " + filePath);
}
public static void convertBookCrossing() throws Exception
{
String dirPath = "D:\\Dropbox\\PhD\\My Work\\Experiments\\Data Sets\\Recommender System Data Set\\Book-Crossing\\";
String ratingSet = dirPath + "ratings-all.txt";
String explicitSet = dirPath + "ratings-explicit.txt";
String implicitSet = dirPath + "ratings-implicit.txt";
List<String> exList = FileIO.readAsList(ratingSet, new Converter<String, String>() {
@Override
public String transform(String line)
{
String[] data = line.split(" ");
int rating = Integer.parseInt(data[2]);
if (rating > 0) return line;
else return null;
}
});
List<String> imList = FileIO.readAsList(ratingSet, new Converter<String, String>() {
@Override
public String transform(String line)
{
String[] data = line.split(" ");
int rating = Integer.parseInt(data[2]);
if (rating == 0) return line;
else return null;
}
});
FileIO.writeList(explicitSet, exList);
FileIO.writeList(implicitSet, imList);
}
@Test
public void resampleTrust() throws Exception
{
ConfigParams.defaultInstance();
String trustSet = Dataset.DIRECTORY + Dataset.TRUST_SET;
Map<String, Map<String, Double>> userTNsMap = DatasetUtils.loadTrustSet(trustSet);
Map<String, Map<String, Double>> tnsMap = new HashMap<>();
int size = 5;
for (String user : userTNsMap.keySet())
{
Map<String, Double> tns = userTNsMap.get(user);
if (tns != null && tns.size() > size)
{
tnsMap.put(user, tns);
}
}
// print out
String dir = Systems.getDesktop();
String path = dir + Dataset.TRUST_SET;
FileIO.writeMap(path, tnsMap, new MapWriter<String, Map<String, Double>>() {
@Override
public String processEntry(String key, Map<String, Double> val)
{
StringBuilder sb = new StringBuilder();
int i = 0;
for (Entry<String, Double> en : val.entrySet())
{
i++;
String line = key + " " + en.getKey() + " " + en.getValue();
if (i < val.size()) line += "\n";
sb.append(line);
}
return sb.toString();
}
}, false);
}
@SuppressWarnings("unchecked")
@Test
public void resampleRatings() throws Exception
{
ConfigParams.defaultInstance();
String trustSet = Dataset.DIRECTORY + "\\Resample-5\\" + Dataset.TRUST_SET;
String ratingSet = Dataset.DIRECTORY + Dataset.RATING_SET;
Map<String, Map<String, Double>> userTNsMap = DatasetUtils.loadTrustSet(trustSet);
Map<String, Map<String, Rating>> userRatingsMap = Dataset.loadTrainSet(ratingSet)[0];
Map<String, Map<String, Rating>> ursMap = new HashMap<>();
for (String user : userTNsMap.keySet())
{
if (userRatingsMap.containsKey(user))
{
Map<String, Rating> rs = userRatingsMap.get(user);
ursMap.put(user, rs);
}
}
// print out
String path = Dataset.DIRECTORY + "\\Resample-5\\" + Dataset.RATING_SET;
FileIO.writeMap(path, ursMap, new MapWriter<String, Map<String, Rating>>() {
@Override
public String processEntry(String key, Map<String, Rating> val)
{
StringBuilder sb = new StringBuilder();
int i = 0;
for (Entry<String, Rating> en : val.entrySet())
{
i++;
Rating r = en.getValue();
String line = r.getUserId() + " " + r.getItemId() + " " + r.getRating();
if (i < val.size()) line += "\n";
sb.append(line);
}
return sb.toString();
}
}, false);
}
}