package com.github.vedenin.url_parser;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.util.ArrayList;
import java.util.List;
/**
* JSoup Hello World
*
* Created by vedenin on 07.04.16.
*/
public class GithubDownLoadTests {
private final static String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36";
private static void initHTTPSDownload() throws Exception {
// Create a new trust manager that trust all certificates
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
}
};
// Activate the new trust manager
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
} catch (Exception e) {
System.out.print(e.getMessage());
}
}
private static void testHtmlParser(String url) throws Exception {
Document doc = Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token")
.timeout(30000).get();
Elements div = doc.select("#readme");
//printElements(div);
work(div);
}
private static void printElements(Elements children) {
for(Element child: children) {
if(!child.text().isEmpty()) {
System.out.print(child.tag().getName() + " : ");
System.out.println(child.text());
}
printElements(child.children());
}
}
public static void main(String[] s) throws Exception {
initHTTPSDownload();
testHtmlParser("https://github.com/Vedenin/useful-java-links/blob/master/readme.md");
}
private static final Tag H1 = Tag.valueOf("h1");
private static final Tag H2 = Tag.valueOf("h2");
private static final Tag H3 = Tag.valueOf("h3");
private static final Tag H4 = Tag.valueOf("h4");
private static final Tag H5 = Tag.valueOf("h5");
private static final Tag H6 = Tag.valueOf("h6");
private static boolean isHeader(Tag tag) {
return H1.equals(tag) || H2.equals(tag) || H3.equals(tag) || H4.equals(tag) || H5.equals(tag) || H6.equals(tag);
}
private static List<LinkContainer> work(Elements elements) {
List<LinkContainer> result = new ArrayList<>(elements.size());
String currentCategory = null;
for(Element element: elements) {
Tag tag = element.tag();
if(isHeader(tag)) {
currentCategory = element.text();
System.out.println(currentCategory);
}
work(element.children());
}
return result;
}
private static class LinkContainer {
private String category;
private String name;
private String url;
private String description;
private String star;
private String stackOverflow;
private String license;
private String licenseUrl;
private String site;
@Override
public String toString() {
return "{" +
"category='" + category + '\'' +
", name='" + name + '\'' +
", url='" + url + '\'' +
", description='" + description + '\'' +
", star='" + star + '\'' +
", stackOverflow='" + stackOverflow + '\'' +
", license='" + license + '\'' +
", licenseUrl='" + licenseUrl + '\'' +
", site='" + site + '\'' +
'}';
}
}
}