package file.preprocessing;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
// used for LastFm and Delicious (small)
// userID bookmarkID tagID timestamp
public class LastFMProcessor {
public static boolean processFile(String inputFile, String outputFile) {
try {
FileReader reader = new FileReader(new File("./data/csv/lastfm_core/" + inputFile));
FileWriter writer = new FileWriter(new File("./data/csv/lastfm_core/" + outputFile + ".txt"));
BufferedReader br = new BufferedReader(reader);
BufferedWriter bw = new BufferedWriter(writer);
String line = null;
String resID = "", userHash = "", tagID = "", timestamp = "";
List<String> tags = new ArrayList<String>();
int i = 0;
while ((line = br.readLine()) != null) {
if (i++ == 0) { // skip first line
continue;
}
String[] lineParts = line.split("\t");
if (!resID.isEmpty() && !userHash.isEmpty() && (!resID.equals(lineParts[1]) || !userHash.equals(lineParts[0]))) {
//resID = getNameByID(resID, "resources");
if (resID != null) {
writeLine(bw, resID, userHash, timestamp, tags);
}
tags.clear();
}
tagID = lineParts[2];
resID = lineParts[1];
userHash = lineParts[0];
timestamp = lineParts[3];
String tagName = getNameByID(tagID, "tags.dat");
if (tagName != null) {
tags.add(tagName);
}
//tags.add(tagID);
}
writeLine(bw, resID, userHash, timestamp, tags);
br.close();
bw.flush();
bw.close();
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
private static boolean writeLine(BufferedWriter bw, String resID, String userHash, String timestamp, List<String> tags) {
try {
String tagString = "";
for (String tag : tags) {
tagString += (tag + ",");
}
tagString = tagString.length() > 0 ? tagString.substring(0, tagString.length() - 1) : "";
bw.write("\"" + userHash + "\";\"" + resID + "\";\"" + processTimestamp(timestamp) + "\";\"" + tagString + "\";\"\"\n");
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
private static long processTimestamp(String timestamp) {
return Long.parseLong(timestamp) / 1000; // because of seconds
}
private static String getNameByID(String id, String file) {
String line = null;
try {
FileReader bookmarkReader = new FileReader(new File("./data/csv/lastfm_core/" + file));
BufferedReader bookmarkBr = new BufferedReader(bookmarkReader);
while ((line = bookmarkBr.readLine()) != null) {
String[] lineParts = line.split("\t");
if (lineParts.length >= 2 && lineParts[0].equals(id)) {
bookmarkBr.close();
return lineParts[1];
}
}
bookmarkBr.close();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}