package moviescraper.doctord.controller.siteparsingprofile.specific; import java.net.MalformedURLException; import java.net.URL; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile; import moviescraper.doctord.model.SearchResult; public class Data18SharedMethods { //tries to guess what the viewer url is from a passed in url public static URL getReferrerURLFromImageURL(URL url) { if (url != null && url.toString().endsWith(".jpg")) { int indexOfLastSlash = url.toString().lastIndexOf('/'); if (indexOfLastSlash > 0) { String tempUrlString = url.toString().substring(0,indexOfLastSlash); String imageNumber = url.toString().substring(indexOfLastSlash); if(imageNumber != null && imageNumber.endsWith(".jpg") && imageNumber.length() > 5) { imageNumber = imageNumber.substring(1,imageNumber.length()-4); } int indexOfSecondToLastSlash = tempUrlString.toString().lastIndexOf('/'); if (indexOfSecondToLastSlash > 0 && tempUrlString.length() > 1) { String contentID = tempUrlString.substring(indexOfSecondToLastSlash+1); URL referrerURL; try { referrerURL = new URL("http://www.data18.com/viewer/" + "1" + contentID + "/" + imageNumber); return referrerURL; } catch (MalformedURLException e) { e.printStackTrace(); } } } } return url; } //Used to implement the SecurityPassthrough interface for both data18 scrapers public static boolean requiresSecurityPassthrough(Document document) { if(document != null && document.html().contains("Security page: [data18.com]")) { System.out.println("Found security page for data 18; attempting to bypass"); return true; } return false; } //Used to implement the SecurityPassthrough interface for both data18 scrapers public static Document runSecurityPassthrough(Document document, SearchResult originalSearchResult) { //find the first link in the document, download the href, then try to download the original result again if(document != null) { Element firstLink = document.select("a").first(); if (firstLink != null && firstLink.attr("href") != null) { Document captchaSolved = SiteParsingProfile.downloadDocument(new SearchResult(firstLink.attr("href"))); if(captchaSolved != null) { return SiteParsingProfile.downloadDocument(originalSearchResult); } } } return document; } }