package focusedCrawler.tools;
import java.io.File;
import java.io.IOException;
import java.net.URLDecoder;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
import focusedCrawler.util.LinkFilter;
public class CborDataReclassifier {
static final ObjectMapper cborMapper = new ObjectMapper(new CBORFactory());
static final ObjectMapper jsonMapper = new ObjectMapper();
public static void main(String[] args) throws IOException {
Path inputLocation = Paths.get(args[0]);
Path negativeDirectory = Paths.get(args[1]);
LinkFilter linkfilter = new LinkFilter(Paths.get(args[2]).toString());
int filesMoved = 0;
int filesTotal = 0;
DirectoryStream<Path> fileStream = Files.newDirectoryStream(inputLocation);
for (Path p : fileStream) {
File f = p.toFile();
String url = f.getName();
url = url.substring(0, url.lastIndexOf('_'));
url = URLDecoder.decode(url, "UTF-8");
// TargetModel targetModel = cborMapper.readValue(f, focusedCrawler.target.TargetModel.class);
// String url = targetModel.url;
boolean accept = linkfilter.accept(url);
if (!accept) {
// System.out.println(accept + " -> " + url);
Path newPath = negativeDirectory.resolve(p.getFileName());
f.renameTo(newPath.toFile());
filesMoved++;
if(filesMoved%1000==0) {
double percent = filesMoved / (filesTotal+0d);
System.out.println(filesMoved + " out of " + filesTotal + " files moved." +
" (" + percent + ")");
}
}
filesTotal++;
}
}
}