package org.jcommons.test;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class DownloaderTest {
public static void main(String[] args) throws IOException{
Document doc = Jsoup.connect("http://meta.stackexchange.com/questions/134495/academic-papers-using-stack-exchange-data").get();
Elements eles = doc.getElementsContainingText("[PDF]");
eles.addAll(doc.getElementsContainingText("[arXiv]"));
String folderName = "D:/dl";
for(Element ele : eles){
String src = ele.attr("href");
if(src==null || src.trim().equals(""))
continue;
URL url = new URL(src);
Element parent = ele.parent();
Elements eles1 = parent.getElementsByTag("strong");
Element nameEle = eles1.get(0);
String fileName = nameEle.text().replace(":", " ").replace("\"", "").replace("'", "").replace("?", "");
if(fileName.contains("Fit or"))
continue;
if(!fileName.endsWith("."))
fileName = fileName.concat(".");
fileName = fileName.concat("pdf");
System.out.println(fileName);
InputStream in = null;
try{
in = url.openStream();
}catch(Exception e){
continue;
}
OutputStream out = new BufferedOutputStream(new FileOutputStream(folderName+"/"+fileName));
for (int b; (b = in.read()) != -1;) {
out.write(b);
}
out.close();
in.close();
}
}
}