package file.preprocessing;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import common.Bookmark;
import common.Utilities;
import file.BookmarkReader;
import file.postprocessing.CatDescFiltering;
public class TensorProcessor {
private static Set<String> entries;
public static void writeFiles(String filename, int trainSize, int testSize, boolean tagRec, Integer minBookmarks, Integer maxBookmarks, CatDescFiltering filter) {
entries = new LinkedHashSet<String>();
//filename += "_res";
BookmarkReader reader = new BookmarkReader(trainSize, false);
reader.readFile(filename);
List<Bookmark> trainList = reader.getBookmarks().subList(0, trainSize);
List<Bookmark> testList = reader.getBookmarks().subList(trainSize, trainSize + testSize);
String name = "";//(tagRec ? "_tensor" : "_mymedialite");
String outputFilename = filename.split("_")[0];
// train file
// TODO: reader
createFile(trainList, "./data/csv/" + outputFilename + "_train" + name + ".txt", null, false, tagRec, minBookmarks, maxBookmarks, null);
// test file
String suffix = "";
if (filter != null) {
suffix += ("_" + (filter.getDescriber() ? "desc" : "cat"));
}
createFile(testList, "./data/csv/" + outputFilename + suffix + "_test" + name + ".txt", null, true, tagRec, minBookmarks, maxBookmarks, filter);
}
private static void createFile(List<Bookmark> list, String filename, BookmarkReader reader, boolean testset, boolean tagRec, Integer minBookmarks, Integer maxBookmarks, CatDescFiltering filter) {
try {
File tempFile = new File(filename);
BufferedWriter bw = new BufferedWriter(new FileWriter(tempFile));
for (Bookmark data : list) {
if (testset && reader != null) { // means test-set
// TODO: check for resource
if (!Utilities.isEntityEvaluated(reader, data.getUserID(), minBookmarks, maxBookmarks, false)) {
continue; // skip this user if it shoudln't be evaluated
}
}
if (filter != null) { // also for test-set
if (!filter.evaluate(data.getUserID())) {
continue;
}
}
if (!entries.contains(data.getUserID() + "_" + data.getResourceID())) {
if (tagRec) {
for (int tag : data.getTags()) {
bw.write(data.getUserID() + "\t" + data.getResourceID() + "\t" + tag + "\n");
}
} else {
String ratingString = "";
if (data.getRating() != -2) {
ratingString = "\t" + (int)data.getRating();
}
bw.write(data.getUserID() + "\t" + (reader == null ? data.getResourceID() : reader.getResources().get(data.getResourceID())) + ratingString + "\n");
}
entries.add(data.getUserID() + "_" + data.getResourceID());
}
}
bw.flush();
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}