package librec.undefined; import happy.coding.io.FileIO; import happy.coding.io.Logs; import happy.coding.math.Randoms; import happy.coding.system.Debug; import happy.coding.system.Systems; import java.io.BufferedReader; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import librec.data.DataConvertor; import librec.data.DataDAO; import librec.data.DenseMatrix; import librec.data.DenseVector; import librec.data.MatrixEntry; import librec.data.SparseMatrix; import librec.data.SparseVector; import librec.data.SymmMatrix; import org.junit.Test; import com.google.common.collect.BiMap; import com.google.common.collect.HashBasedTable; import com.google.common.collect.Table; public class UnitTests { @Test public void testSerialization() throws Exception { String filePath = Systems.getDesktop() + "vec.dat"; DenseVector vec = new DenseVector(11); for (int i = 10, j = 0; i >= 0; i--, j++) vec.set(j, i); FileIO.serialize(vec, filePath); DenseVector v2 = (DenseVector) FileIO.deserialize(filePath); Logs.debug(v2.toString()); DenseMatrix mat = new DenseMatrix(3, 4); for (int i = 0; i < 3; i++) for (int j = 0; j < 4; j++) mat.set(i, j, i + j); Logs.debug(mat); String matPath = Systems.getDesktop() + "mat.dat"; FileIO.serialize(mat, matPath); DenseMatrix mat2 = (DenseMatrix) FileIO.deserialize(matPath); Logs.debug(mat2); } @Test public void testCvtFirstLines() throws Exception { String dirPath = "D:\\Research\\Datasets\\KDD Cup 2011\\Yahoo! Music Dataset\\track2\\"; String sourcePath = dirPath + "trainIdx2.firstLines.txt"; String targetPath = dirPath + "train.txt"; // train data set DataConvertor dc = new DataConvertor(sourcePath, targetPath); dc.cvtFirstLines("\\|", "\t"); // test data set dc.setSourcePath(dirPath + "testIdx2.firstLines.txt"); dc.setTargetPath(dirPath + "test.txt"); dc.cvtFirstLines("\\|", "\t"); } @Test public void testSparseMatrix() throws Exception { Table<Integer, Integer, Double> vals = HashBasedTable.create(); vals.put(0, 0, 10.0); vals.put(0, 4, -2.0); vals.put(1, 0, 3.0); vals.put(1, 1, 9.0); vals.put(1, 5, 3.0); vals.put(2, 1, 7.0); vals.put(2, 2, 8.0); vals.put(2, 3, 7.0); vals.put(3, 0, 3.0); vals.put(3, 2, 8.0); vals.put(3, 3, 7.0); vals.put(3, 4, 5.0); vals.put(4, 1, 8.0); vals.put(4, 3, 9.0); vals.put(4, 4, 9.0); vals.put(4, 5, 13.0); vals.put(5, 1, 4.0); vals.put(5, 4, 2.0); vals.put(5, 5, -1.0); SparseMatrix A = new SparseMatrix(6, 6, vals); Logs.debug(A); String dirPath = FileIO.desktop; FileIO.serialize(A, dirPath + "A.mat"); SparseMatrix A2 = (SparseMatrix) FileIO.deserialize(dirPath + "A.mat"); Logs.debug(A2); SparseVector v = new SparseVector(10); v.set(2, 5); v.set(9, 10); Logs.debug(v); FileIO.serialize(v, dirPath + "v.vec"); SparseVector v2 = (SparseVector) FileIO.deserialize(dirPath + "v.vec"); Logs.debug(v2); SymmMatrix mm = new SymmMatrix(5); mm.set(0, 1, 0.5); mm.set(2, 3, 0.3); mm.set(4, 2, 0.8); Logs.debug(mm); FileIO.serialize(mm, dirPath + "mm.mat"); SymmMatrix mm2 = (SymmMatrix) FileIO.deserialize(dirPath + "mm.mat"); Logs.debug(mm2); } public static void main(String[] args) throws Exception { String dirPath = "D:\\Java\\Datasets\\BookCrossing\\"; String path = dirPath + "ratings.txt"; DataDAO dao = new DataDAO(path); // dao.readData(new int[] { 0, 1 }, false); if (Debug.OFF) { dao.printSpecs(); } else if (Debug.ON) { dao.printDistr(true); } else { BiMap<String, Integer> userIds = dao.getUserIds(); String dataPath = dirPath + "review-ratings.txt"; BufferedReader br = FileIO.getReader(dataPath); String line = null; while ((line = br.readLine()) != null) { String[] data = line.split("[ \t,]"); String trustor = data[0]; String trustee = data[1]; if (!userIds.containsKey(trustor)) userIds.put(trustor, userIds.size()); if (!userIds.containsKey(trustee)) userIds.put(trustee, userIds.size()); } br.close(); if (Debug.OFF) { br = FileIO.getReader(dirPath + "user-reviews.txt"); line = null; while ((line = br.readLine()) != null) { String[] data = line.split("[ \t,]"); String user = data[0]; if (!userIds.containsKey(user)) userIds.put(user, userIds.size()); } br.close(); } Logs.debug("Total users: {}", userIds.size()); } } /** * Randomly sample 100K ratings */ @Test public void sample100K() throws Exception { String dir = "D:\\Java\\Datasets\\Epinions\\"; String ratingDir = dir + "ratings.txt"; String trustDir = dir + "trust.txt"; // probability = 100_000/664_824=0.15 double prob = 100_000 / (664_824 + 0.0); String dirPath = dir + "100K\\"; FileIO.makeDirectory(dirPath); FileIO.deleteFile(dirPath + "ratings.txt"); FileIO.deleteFile(dirPath + "trust.txt"); // rating data BufferedReader fr = FileIO.getReader(ratingDir); String line = null; List<String> lines = new ArrayList<>(1500); List<String> users = new ArrayList<>(); while ((line = fr.readLine()) != null) { double rand = Math.random(); if (rand < prob) { lines.add(line); String user = line.split("[ \t,]")[0]; if (!users.contains(user)) users.add(user); if (lines.size() >= 1000) { FileIO.writeList(dirPath + "ratings.txt", lines, true); lines.clear(); } } } fr.close(); if (lines.size() > 0) { FileIO.writeList(dirPath + "ratings.txt", lines, true); lines.clear(); } Logs.debug("Finish ratings!"); // trust data fr = FileIO.getReader(trustDir); line = null; while ((line = fr.readLine()) != null) { String[] data = line.split("[ \t,]"); String tor = data[0]; String tee = data[1]; if (users.contains(tor) && users.contains(tee)) { lines.add(line); if (lines.size() >= 1000) { FileIO.writeList(dirPath + "trust.txt", lines, true); lines.clear(); } } } fr.close(); if (lines.size() > 0) { FileIO.writeList(dirPath + "ratings.txt", lines, true); lines.clear(); } Logs.debug("Finish trust and All!"); } @Test public void testSample() throws Exception { String dir = "D:\\Dropbox\\PhD\\My Work\\Experiments\\Datasets\\Ratings\\Epinions\\Extended Epinions dataset\\"; String dirDest = dir + "Distrust v1\\"; // read trust data to get all users Set<Long> users = new HashSet<>(); BufferedReader br = FileIO.getReader(dirDest + "trust.txt"); String line = null; while ((line = br.readLine()) != null) { String[] data = line.split("\t"); long trustor = Long.parseLong(data[0]); long trustee = Long.parseLong(data[1]); users.add(trustor); users.add(trustee); } br.close(); // retrieve ratings given by the above users; br = FileIO.getReader(dir + "rating.txt"); line = null; List<String> lines = new ArrayList<>(1500); String file = dirDest + "ratings.txt"; FileIO.deleteFile(file); while ((line = br.readLine()) != null) { String[] data = line.split("[ \t,]"); String item = data[0]; long user = Long.parseLong(data[1]); String rate = data[2]; if (users.contains(user)) { lines.add(user + "\t" + item + "\t" + rate); if (lines.size() >= 1000) { FileIO.writeList(file, lines, true); lines.clear(); } } } if (lines.size() > 0) FileIO.writeList(file, lines, true); Logs.debug("Done!"); } @Test public void testSample2() throws Exception { String dir = "D:\\Dropbox\\PhD\\My Work\\Experiments\\Datasets\\Ratings\\Epinions\\Extended Epinions dataset\\"; String dirDest = dir + "Distrust_v8\\"; int userAmount = 4_000; FileIO.makeDirectory(dirDest); // read ratings DataDAO rateDao = new DataDAO(dir + "rating.txt"); SparseMatrix rateMatrix = rateDao.readData(new int[] { 1, 0, 2 }, false, -1); BiMap<String, Integer> ids = rateDao.getUserIds(); for (MatrixEntry me : rateMatrix) { double rate = me.get(); if (rate > 5) me.set(5.0); } // read trust data to get all users; who have both ratings and trust Set<Long> allUsers = new HashSet<>(); BufferedReader br = FileIO.getReader(dir + "user_rating.txt"); String line = null; while ((line = br.readLine()) != null) { String[] data = line.split("\t"); long trustor = Long.parseLong(data[0]); long trustee = Long.parseLong(data[1]); if (ids.containsKey(data[0]) && ids.containsKey(data[1])) { allUsers.add(trustor); allUsers.add(trustee); } } br.close(); // sample users from all candidate users; List<Long> users = new ArrayList<>(allUsers); List<Long> sample = new ArrayList<>(); for (int idx : Randoms.randInts(userAmount, 0, users.size())) sample.add(users.get(idx)); // retrieve trusts containing trustors, trustees in sample br = FileIO.getReader(dir + "user_rating.txt"); line = null; List<String> lines = new ArrayList<>(1500); String file = dirDest + "trust.txt"; FileIO.deleteFile(file); while ((line = br.readLine()) != null) { String[] data = line.split("[ \t,]"); long trustor = Long.parseLong(data[0]); long trustee = Long.parseLong(data[1]); String rate = data[2]; if (sample.contains(trustor) && sample.contains(trustee)) { lines.add(trustor + "\t" + trustee + "\t" + rate); if (lines.size() >= 1000) { FileIO.writeList(file, lines, true); lines.clear(); } } } if (lines.size() > 0) FileIO.writeList(file, lines, true); // retrieve ratings given by the above users; br = FileIO.getReader(dir + "rating.txt"); line = null; lines.clear(); file = dirDest + "ratings.txt"; FileIO.deleteFile(file); while ((line = br.readLine()) != null) { String[] data = line.split("[ \t,]"); String item = data[0]; long user = Long.parseLong(data[1]); double rate = Double.parseDouble(data[2]); rate = rate > 5 ? 5.0 : rate; if (sample.contains(user)) { lines.add(user + "\t" + item + "\t" + rate); if (lines.size() >= 1000) { FileIO.writeList(file, lines, true); lines.clear(); } } } if (lines.size() > 0) FileIO.writeList(file, lines, true); Logs.debug("Done!"); } @Test public void testSampleSet() throws Exception { String dir = "D:\\Dropbox\\PhD\\My Work\\Experiments\\Datasets\\Ratings\\Epinions\\Extended Epinions dataset\\" + "Distrust_v8\\"; // ratings DataDAO rateDao = new DataDAO(dir + "ratings-2.txt"); SparseMatrix rateMatrix = rateDao.readData(); Logs.debug("Total rate size = {}", rateMatrix.size()); if (Debug.OFF) { // remove items with less than 20 ratings int threshold = 5; List<Integer> badIds = new ArrayList<>(); for (int j = 0; j < rateMatrix.numColumns(); j++) { int size = rateMatrix.columnSize(j); if (size < threshold) badIds.add(j); } List<String> lines = new ArrayList<>(1500); String file = dir + "ratings-2.txt"; FileIO.deleteFile(file); for (MatrixEntry me : rateMatrix) { int user = me.row(); int item = me.column(); double rate = me.get(); if (rate != 0 && !badIds.contains(item)) { String line = rateDao.getUserId(user) + "\t" + rateDao.getItemId(item) + "\t" + rate; lines.add(line); if (lines.size() >= 1000) { FileIO.writeList(file, lines, true); lines.clear(); } } } if (lines.size() > 0) FileIO.writeList(file, lines, true); Logs.debug( "Resample a subset with items receiving at least {} ratings", threshold); } // trust DataDAO trustDao = new DataDAO(dir + "trust.txt", rateDao.getUserIds()); SparseMatrix trustMatrix = trustDao.readData(); Logs.debug("Total trust size = {}", trustMatrix.size()); // distrust amount int cntT = 0, cntDT = 0; for (MatrixEntry me : trustMatrix) { double t = me.get(); if (t == 1) cntT++; else if (t == -1) cntDT++; } Logs.debug("Trust = {}, Distrust = {}", cntT, cntDT); Logs.debug("Done!"); } }