package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.io.FilenameUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.Movie;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
public class ExcaliburFilmsParsingProfile extends SiteParsingProfile implements SpecificProfile {
@Override
public List<ScraperGroupName> getScraperGroupNames()
{
if(groupNames == null)
groupNames = Arrays.asList(ScraperGroupName.AMERICAN_ADULT_DVD_SCRAPER_GROUP);
return groupNames;
}
@Override
public Title scrapeTitle() {
Element titleElement = document.select("title").first();
if(titleElement != null)
{
String titleText = titleElement.text();
titleText = titleText.replaceFirst("Adult DVD", "");
titleText = titleText.replaceFirst("Blu-Ray", "");
return new Title(titleText);
}
return new Title("");
}
@Override
public OriginalTitle scrapeOriginalTitle() {
return new OriginalTitle(scrapeTitle().getTitle());
}
@Override
public SortTitle scrapeSortTitle() {
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
//Excalibur doesn't have set info
return Set.BLANK_SET;
}
@Override
public Rating scrapeRating() {
//Excalibur doesn't have rating info
return Rating.BLANK_RATING;
}
@Override
public ReleaseDate scrapeReleaseDate() {
Element releaseDateElement = document.select("font:containsOwn(Released:) + font").first();
if(releaseDateElement != null)
{
ReleaseDate releaseDate = new ReleaseDate(releaseDateElement.text(),new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH));
return releaseDate;
}
return ReleaseDate.BLANK_RELEASEDATE;
}
@Override
public Year scrapeYear() {
return scrapeReleaseDate().getYear();
}
@Override
public Top250 scrapeTop250() {
//Excalibur doesn't have this info
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
//Excalibur doesn't have this info
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
//Excalibur doesn't have this info
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
Element plotElement = document.select("a:has(font b:containsOwn(Description:)) + font").first();
if(plotElement != null)
{
String plotElementText = plotElement.text().trim();
//They like to include their plot descriptions within quotes, so we can remove those quotes
if(plotElementText.startsWith("\"") && plotElementText.endsWith("\"") && plotElementText.length() > 2)
{
plotElementText = plotElementText.substring(1, plotElementText.length() -1);
}
return new Plot(plotElementText);
}
return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
//Excalibur doesn't have this information
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
Element runtimeElement = document.select("font:containsOwn(Run Time:) + font").first();
if(runtimeElement != null)
{
String runtimeText = runtimeElement.text().replace(" min.", "");
return new Runtime(runtimeText);
}
return Runtime.BLANK_RUNTIME;
}
@Override
public Thumb[] scrapePosters() {
String movieID = scrapeID().getId();
String thumbPath = getPosterPathFromIDString(movieID);
if(thumbPath == null)
return new Thumb[0];
try {
Thumb posterThumb = new Thumb(thumbPath);
Thumb[] thumbsToReturn = {posterThumb};
return thumbsToReturn;
} catch (MalformedURLException e) {
e.printStackTrace();
return new Thumb[0];
}
}
private String getPosterPathFromIDString(String movieID)
{
if(movieID == null)
return null;
return "http://images.excaliburfilms.com/DVD/reviews/imagesBB020609/largemoviepic/dvd_" + movieID + ".jpg";
}
private String getPosterPreviewPathFromIDString(String movieID)
{
if(movieID == null)
return null;
return "http://images.excaliburfilms.com/dvd/dvdicon2/dvd_" + movieID + ".jpg";
}
@Override
public Thumb[] scrapeFanart() {
//No Fanart on this site
return new Thumb[0];
}
/**
* We return the back cover as the extrafanart for Excalibur Films
*/
@Override
public Thumb[] scrapeExtraFanart() {
String movieID = scrapeID().getId();
String thumbPath = "http://images.excaliburfilms.com/DVD/reviews/imagesBB020609/largemoviepic/dvd_" + movieID + "-b.jpg";
try {
Thumb posterThumb = new Thumb(thumbPath);
Thumb[] thumbsToReturn = {posterThumb};
return thumbsToReturn;
} catch (MalformedURLException e) {
e.printStackTrace();
return new Thumb[0];
}
}
@Override
public MPAARating scrapeMPAA() {
Element mpaaRatingElement = document.select("font:containsOwn(Rated:) + font a").first();
if(mpaaRatingElement != null)
{
String mpaaRatingText = mpaaRatingElement.text();
return new MPAARating(mpaaRatingText);
}
return MPAARating.BLANK_RATING;
}
@Override
public ID scrapeID() {
String id = getIDStringFromDocumentLocation(document);
if(id != null)
{
return new ID(id);
}
return ID.BLANK_ID;
}
private String getIDStringFromDocumentLocation(Document doc)
{
if (doc != null) {
String id = doc.location();
if (id.contains("/") && id.contains("_") && id.contains(".htm")) {
id = id.substring(id.lastIndexOf('/') + 1, Math.min(id.indexOf('_'), id.length()));
return id;
}
}
return null;
}
@Override
public ArrayList<Genre> scrapeGenres() {
ArrayList<Genre> genreList = new ArrayList<>();
Element genreElement = document.select("font:containsOwn(Fetish:) + a").first();
if(genreElement != null)
{
String genreText = genreElement.text();
if(genreText.length() > 0 && !genreText.equals("BluRay"))
{
genreList.add(new Genre(genreText));
}
}
return genreList;
}
@Override
public ArrayList<Actor> scrapeActors() {
ArrayList<Actor> actorList = new ArrayList<>();
Element firstActorList = document.select("font:containsOwn(Starring:) + font").first();
Elements actorListElements = firstActorList.select("a");
for(Element currentActor : actorListElements)
{
String actorName = currentActor.text();
String pageName = currentActor.attr("href");
Thumb actorThumb = getThumbForPersonPageUrl(pageName);
if(actorThumb != null)
{
Actor currentActorToAdd = new Actor(actorName, "", actorThumb);
actorList.add(currentActorToAdd);
}
else
{
Actor currentActorToAdd = new Actor(actorName, "", null);
if(actorName.trim().length() > 0)
actorList.add(currentActorToAdd);
}
}
//get no image actors
String firstActorListText = firstActorList.ownText();
if(firstActorListText.length() > 0)
{
String currentActorTextSplitByComma[] = firstActorListText.trim().split(",");
for(String currentNoThumbActor: currentActorTextSplitByComma)
{
String actorName = currentNoThumbActor.trim();
//last actor in the list has a period since the list is in sentence form, so we want to get rid of that
if(actorName.endsWith("."))
actorName = actorName.substring(0, actorName.length()-1);
//we already have some of the actors if they were added in the thumb version, so check before adding them again
boolean hadThisActorAlready = false;
for(Actor existingActor: actorList)
{
if(existingActor.getName().equals(actorName))
hadThisActorAlready = true;
}
if(!hadThisActorAlready && actorName.trim().length() > 0)
actorList.add(new Actor(actorName, "", null));
}
}
return actorList;
}
private Thumb getThumbForPersonPageUrl(String personPageUrl) {
String actorFromPageName = personPageUrl.substring(personPageUrl.lastIndexOf("/"), personPageUrl.length())
.replace(".htm", "");
String actorThumbURL = "http://Images.ExcaliburFilms.com/pornlist/starpicsAA020309" + actorFromPageName
+ ".jpg";
Thumb actorThumb = null;
try {
actorThumb = new Thumb(actorThumbURL);
} catch (MalformedURLException e) {
return null;
}
return actorThumb;
}
@Override
public ArrayList<Director> scrapeDirectors() {
ArrayList<Director> directorList = new ArrayList<>();
Element directorElement = document.select("font:containsOwn(Director:) + a").first();
if(directorElement != null)
{
String directorName = directorElement.text();
String directorPageURL = directorElement.attr("href");
Thumb directorThumb = null;
if(directorPageURL != null)
{
directorThumb = getThumbForPersonPageUrl(directorPageURL);
}
Director directorToAdd = new Director(directorName, directorThumb);
directorList.add(directorToAdd);
}
return directorList;
}
@Override
public Studio scrapeStudio() {
Element studioElement = document.select("font:containsOwn(By:) + a").first();
if(studioElement != null)
{
String studioText = studioElement.text();
return new Studio(studioText);
}
return Studio.BLANK_STUDIO;
}
@Override
public String createSearchString(File file) {
String fileBaseName;
if (file.isFile())
fileBaseName = FilenameUtils.getBaseName(Movie.getUnstackedMovieName(file));
else
fileBaseName = file.getName();
String[] splitBySpace = fileBaseName.split(" ");
if (splitBySpace.length > 1) {
// check if last word in filename contains a year like (2012) or [2012]
// we want to remove this from our search because it freaks out the search on excalibur films and gives no results
if (splitBySpace[splitBySpace.length - 1].matches("[\\(\\[]\\d{4}[\\)\\]]")) {
fileBaseName = fileBaseName.replaceFirst("[\\(\\[]\\d{4}[\\)\\]]", "").trim();
}
}
URLCodec codec = new URLCodec();
try {
fileBaseName = codec.encode(fileBaseName);
} catch (EncoderException e) {
e.printStackTrace();
}
fileBaseName = "http://www.excaliburfilms.com/search/adultSearch.htm?searchString=" + fileBaseName
+ "&Case=ExcalMovies&Search=AdultDVDMovies&SearchFor=Title.x";
return fileBaseName;
}
@Override
public SearchResult[] getSearchResults(String searchString) throws IOException {
Document doc = Jsoup.connect(searchString).timeout(CONNECTION_TIMEOUT_VALUE).get();
boolean onSearchResultsPage = doc.location().contains("adultSearch.htm");
//found the movie without a search results page
if(doc.location() != null && !onSearchResultsPage)
{
String idOfPage = getIDStringFromDocumentLocation(doc);
String posterPath = getPosterPreviewPathFromIDString(idOfPage);
String label = doc.select("title").first().text();
Thumb previewImage = new Thumb(posterPath);
//SearchResult directResult = new SearchResult(doc.location());
SearchResult result = null;
if(posterPath != null)
result = new SearchResult(doc.location(), label, previewImage);
else
result = new SearchResult(doc.location(), label, null);
SearchResult[] directResultArray = {result};
return directResultArray;
}
//This selector in particular tends to break when they update their site.
//Unfortunately, they don't use things like ids or classes much which makes it hard to get the right element without resorting to
//hackery like width=600 stuff
Elements foundMovies = doc.select("table[width=600]:contains(Wish List) tr tbody:has(img)");
LinkedList<SearchResult> searchList = new LinkedList<>();
for(Element movie: foundMovies){
String urlPath = movie.select("a").first().attr("href");
String thumb = movie.select("img").first().attr("src");
String label = movie.select("img").first().attr("alt");
SearchResult searchResult = new SearchResult(urlPath, label, new Thumb(thumb));
if(!searchList.contains(searchResult))
searchList.add(searchResult);
}
return searchList.toArray(new SearchResult[searchList.size()]);
}
@Override
public SiteParsingProfile newInstance() {
return new ExcaliburFilmsParsingProfile();
}
@Override
public String getParserName() {
return "Excalibur Films";
}
}