package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
public class IAFDParsingProfile extends SiteParsingProfile implements SpecificProfile {
boolean useSiteSearch = true;
String yearFromFilename = "";
String fileName;
@Override
public List<ScraperGroupName> getScraperGroupNames()
{
if(groupNames == null)
groupNames = Arrays.asList(ScraperGroupName.AMERICAN_ADULT_DVD_SCRAPER_GROUP);
return groupNames;
}
@Override
public Title scrapeTitle() {
Element titleElement = document.select(getTitleElementSelector()).first();
System.out.println(titleElement);
//System.out.println(document);
System.out.println("Scraping title");
if(titleElement != null)
{
String titleText = titleElement.text().trim();
//remove year like (2015) from text
if(titleText.matches(".+\\(\\d{4}\\)"))
{
titleText = StringUtils.substringBeforeLast(titleText, " ");
}
//titleText = titleText.replaceFirst("(\\d{4}", "");
//System.err.println("New title text = " + titleText);
return new Title(titleText);
}
else return new Title("");
}
private String getTitleElementSelector()
{
return "div.col-sm-12 h1";
}
@Override
public OriginalTitle scrapeOriginalTitle() {
return OriginalTitle.BLANK_ORIGINALTITLE;
}
@Override
public SortTitle scrapeSortTitle() {
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
Element setElement = findSidebarElement("Studio");
if(setElement != null && setElement.text().contains(".com"))
{
return new Set(setElement.text());
}
else return Set.BLANK_SET;
}
@Override
public Rating scrapeRating() {
// TODO Auto-generated method stub
return Rating.BLANK_RATING;
}
@Override
public Year scrapeYear() {
String yearText = "year=";
String uri = document.baseUri();
int indexOf = uri.indexOf(yearText) + yearText.length();
String releaseDateText = uri.substring(indexOf, indexOf + 4);
if(releaseDateText.length() == 4)
{
return new Year(releaseDateText);
}
else return Year.BLANK_YEAR;
}
@Override
public ReleaseDate scrapeReleaseDate() {
//I don't think IAFD has the month or day a movie was released - only the year
//In some rare cases they have this information in the comments. There is also a release date field
//in the side bar that always seems to be blank. Maybe they are going to populate this in the future?
//TODO: get this info out of the comments field. this info may be inconsistently formatted, so watch out
return ReleaseDate.BLANK_RELEASEDATE;
}
@Override
public Top250 scrapeTop250() {
// TODO Auto-generated method stub
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
// TODO Auto-generated method stub
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
// TODO Auto-generated method stub
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
Element plotElement = document.select("div#sceneinfo ul").first();
if(plotElement != null)
{
//try to put each scene on its own new line
Elements sceneBreakdown = plotElement.select("li");
if(sceneBreakdown != null)
{
String sceneText = "";
for(Element scene : sceneBreakdown)
{
sceneText += scene.text() + "\n";
}
return new Plot(sceneText);
}
else
{
return new Plot(plotElement.text());
}
}
return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
// TODO Auto-generated method stub
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
Element runtimeElement = findSidebarElement("Minutes");
if(runtimeElement != null)
{
return new Runtime(runtimeElement.text());
}
else return Runtime.BLANK_RUNTIME;
}
@Override
public Thumb[] scrapePosters() {
Element posterElement = document.select("a[rel=covers]").first();
if(posterElement != null)
{
Thumb[] posterThumbs = new Thumb[1];
try {
posterThumbs[0] = new Thumb(posterElement.attr("href"));
return posterThumbs;
} catch (MalformedURLException e) {
e.printStackTrace();
return new Thumb[0];
}
}
return new Thumb[0];
}
@Override
public Thumb[] scrapeFanart() {
return new Thumb[0];
}
@Override
public Thumb[] scrapeExtraFanart() {
ArrayList<Thumb> extraFanart = new ArrayList<>();
return extraFanart.toArray(new Thumb[extraFanart.size()]);
}
@Override
public MPAARating scrapeMPAA() {
// TODO Auto-generated method stub
return MPAARating.RATING_XXX;
}
@Override
public ID scrapeID() {
return ID.BLANK_ID;
}
@Override
public ArrayList<Genre> scrapeGenres() {
ArrayList<Genre> genreList = new ArrayList<>();
//No Genres in IAFD
return genreList;
}
@Override
public ArrayList<Actor> scrapeActors() {
Elements actorElements = document.select("div.castbox:not(.nonsex) a"); //performers who are not just extras, etc
ArrayList<Actor> actorList = new ArrayList<>();
if(actorElements != null)
{
for(Element currentActorElement : actorElements)
{
String actorName = currentActorElement.ownText();
String actorAlias = "(as ";
int indexOfAs = actorName.indexOf(actorAlias);
if ( indexOfAs >= 0 ) {
actorName = actorName.substring(indexOfAs + actorAlias.length(), actorName.lastIndexOf(")") );
}
Element actorPicture = currentActorElement.select("img").first();
if (actorPicture == null)
continue; //found something like "Non Sex Performers" Text between actors
String actorThumbnail = actorPicture.absUrl("src");
//case with actor with thumbnail
if(actorThumbnail != null && !actorThumbnail.contains("nophoto"))
{
try {
actorThumbnail = actorThumbnail.replaceFirst(Pattern.quote("/60/"), "/120/");
actorList.add(new Actor(actorName, null, new Thumb(actorThumbnail)));
} catch (MalformedURLException e) {
actorList.add(new Actor(actorName, null, null));
e.printStackTrace();
}
}
//add the actor with no thumbnail
else
{
actorList.add(new Actor(actorName, null, null));
}
}
}
return actorList;
}
@Override
public ArrayList<Director> scrapeDirectors() {
ArrayList<Director> directorList = new ArrayList<>();
Element directorElement = findSidebarElement("Director");
if(directorElement != null)
{
String directorName = directorElement.text().trim();
if(directorName != null && directorName.length() > 0 && !directorName.equals("Unknown"))
directorList.add(new Director(directorName,null));
}
return directorList;
}
@Override
public Studio scrapeStudio() {
Element studioElement = findSidebarElement("Distributor");
if(studioElement != null)
{
String studioText = studioElement.text().trim();
if(studioText != null && studioText.length() > 0)
return new Studio(studioText);
}
return Studio.BLANK_STUDIO;
}
@Override
public String createSearchString(File file) {
scrapedMovieFile = file;
String fileBaseName;
if(file.isFile())
fileBaseName = FilenameUtils.getBaseName(file.getName());
else
fileBaseName = file.getName();
fileBaseName = fileBaseName.replaceFirst("\\s?CD[1234]", "");
fileName = fileBaseName;
String [] splitBySpace = fileBaseName.split(" ");
if(splitBySpace.length > 1)
{
//check if last word in filename contains a year like (2012) or [2012]
if(splitBySpace[splitBySpace.length-1].matches("[\\(\\[]\\d{4}[\\)\\]]"))
{
yearFromFilename = splitBySpace[splitBySpace.length-1].replaceAll("[\\(\\[\\)\\]]", "");
fileBaseName = fileBaseName.replaceFirst("[\\(\\[]\\d{4}[\\)\\]]","").trim();
}
}
if(useSiteSearch)
{
URLCodec codec = new URLCodec();
try {
fileBaseName = codec.encode(fileBaseName);
} catch (EncoderException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
fileBaseName = "http://www.iafd.com/results.asp?searchtype=comprehensive&searchstring=" + fileBaseName;
return fileBaseName;
}
return FilenameUtils.getBaseName(file.getName());
}
@Override
public SearchResult[] getSearchResults(String searchString)
throws IOException {
if(useSiteSearch)
{
ArrayList<SearchResult> linksList = new ArrayList<>();
Document doc = Jsoup.connect(searchString).userAgent(getRandomUserAgent()).referrer("http://www.iafd.com").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
//check to see if we directly found the title
if(doc != null && doc.location().contains("title.asp?title="))
{
String title = doc.select(getTitleElementSelector()).first().text();
linksList.add(new SearchResult(doc.location(), title));
}
Elements movieSearchResultElements = null;
if(doc != null) {
movieSearchResultElements = doc.select("table#titleresult tr td a[href*=title.rme");
}
if(linksList.size() == 0 && (movieSearchResultElements == null || movieSearchResultElements.size() == 0))
{
this.useSiteSearch = false;
return getLinksFromGoogle(fileName, "www.iafd.com/title.rme");
}
else if(movieSearchResultElements != null)
{
for(Element currentMovie : movieSearchResultElements)
{
String currentMovieURL = currentMovie.absUrl("href");
String currentMovieTitle = currentMovie.text();
final String searchForYearText = "year=";
int index = currentMovieURL.indexOf(searchForYearText) + searchForYearText.length();
String releaseDateText = currentMovieURL.substring(index, index+4);
if(releaseDateText != null && releaseDateText.length() > 0)
currentMovieTitle = currentMovieTitle + " (" + releaseDateText + ")";
Thumb currentMovieThumb = new Thumb(currentMovie.select("img").attr("src"));
linksList.add(new SearchResult(currentMovieURL, currentMovieTitle, currentMovieThumb));
}
return linksList.toArray(new SearchResult[linksList.size()]);
}
return linksList.toArray(new SearchResult[linksList.size()]);
}
else
{
this.useSiteSearch = false;
return getLinksFromGoogle(searchString, "www.iafd.com/title.rme");
}
}
@Override
public String toString(){
return "IAFD";
}
@Override
public SiteParsingProfile newInstance() {
return new IAFDParsingProfile();
}
@Override
public String getParserName() {
return "IAFD";
}
private Element findSidebarElement(String textOfSideBarElement)
{
String selector = "p:containsOwn(" + textOfSideBarElement + ") + p.biodata";
System.out.println("selector = " + selector);
Element sidebarElement = document.select(selector).first();
return sidebarElement;
}
}