package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.languagetranslation.Language;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
import moviescraper.doctord.model.preferences.MoviescraperPreferences;
public class JavLibraryParsingProfile extends SiteParsingProfile implements SpecificProfile {
private String siteLanguageToScrape;
public static final String englishLanguageCode = "en";
public static final String japaneseLanguageCode = "ja";
public static final String taiwaneseLanguageCode = "tw";
public static final String chineseLanguageCode = "cn";
private static final boolean reverseAsianNameInEnglish = true;
private String overrideURLJavLibrary;
private static final SimpleDateFormat javLibraryReleaseDateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.ENGLISH);
@Override
public List<ScraperGroupName> getScraperGroupNames()
{
if(groupNames == null)
groupNames = Arrays.asList(ScraperGroupName.JAV_CENSORED_SCRAPER_GROUP);
return groupNames;
}
public String getOverrideURLJavLibrary() {
return overrideURLJavLibrary;
}
public void setOverrideURLJavLibrary(String overrideURLJavLibrary) {
this.overrideURLJavLibrary = overrideURLJavLibrary;
}
public JavLibraryParsingProfile(Document document) {
super(document);
siteLanguageToScrape = determineLanguageToUse();
}
public JavLibraryParsingProfile() {
siteLanguageToScrape = determineLanguageToUse();
}
private String determineLanguageToUse() {
return MoviescraperPreferences.getInstance().getScrapeInJapanese() ? "ja" : "en";
}
public JavLibraryParsingProfile(Document document, String siteLanguageToScrape)
{
super(document);
this.siteLanguageToScrape = siteLanguageToScrape;
}
public JavLibraryParsingProfile(String siteLanguageToScrape) {
this.siteLanguageToScrape = siteLanguageToScrape;
}
@Override
public Title scrapeTitle() {
Element titleElement = document
.select("h3.post-title.text a")
.first();
//remove the ID number off beginning of the title, if it exists (and it usually always does on JavLibrary)
if(titleElement != null)
{
String titleElementText = titleElement.text().trim();
titleElementText = titleElementText.substring(StringUtils.indexOf(titleElementText," ")).trim();
//sometimes this still leaves "- " at the start of the title, so we'll want to get rid of that too
if(titleElementText.startsWith("- "))
{
titleElementText = titleElementText.replaceFirst(Pattern.quote("- "), "");
}
return new Title(titleElementText);
}
//this shouldn't really ever happen...
else return new Title("");
}
@Override
public OriginalTitle scrapeOriginalTitle() {
if (siteLanguageToScrape.equals(japaneseLanguageCode))
return new OriginalTitle(scrapeTitle().getTitle());
try {
String japaneseUrl = document.location().replace("javlibrary.com/" + siteLanguageToScrape + "/", "javlibrary.com/" + japaneseLanguageCode + "/");
Document japaneseDoc = Jsoup.connect(japaneseUrl).userAgent(getRandomUserAgent()).timeout(CONNECTION_TIMEOUT_VALUE).get();
JavLibraryParsingProfile profile = new JavLibraryParsingProfile(japaneseDoc, japaneseLanguageCode);
return profile.scrapeOriginalTitle();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return OriginalTitle.BLANK_ORIGINALTITLE;
}
@Override
public SortTitle scrapeSortTitle() {
// we don't need any special sort title - that's usually something the
// user provides
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
// Site doesn't have any set information
return Set.BLANK_SET;
}
@Override
public Rating scrapeRating() {
//JavLibrary uses a decimal value out of 10 for its rating
Element ratingElement = document
.select("span.score")
.first();
if(ratingElement != null)
{
String ratingText = ratingElement.text();
//Found a match, get rid of surrounding parenthesis and use this as the rating
if(ratingText.contains("("))
{
ratingText = ratingText.substring(1,ratingText.length()-1).trim();
}
return new Rating(10,ratingText);
}
else return Rating.BLANK_RATING; //No rating found on the page
}
@Override
public Year scrapeYear() {
return scrapeReleaseDate().getYear();
}
@Override
public ReleaseDate scrapeReleaseDate() {
Element dateElement = document
.select("div#video_date tr td.header + td.text")
.first();
String dateText = dateElement.text();
//The dateText is in format YYYY-MM-DD
if(dateText.length() > 0)
{
dateText = dateText.trim();
return new ReleaseDate(dateText, javLibraryReleaseDateFormat);
}
else return ReleaseDate.BLANK_RELEASEDATE;
}
@Override
public Top250 scrapeTop250() {
// This type of info doesn't exist on JavLibrary
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
Element lengthElement = document
.select("div#video_length tr td.header + td span.text")
.first();
String lengthText = lengthElement.text();
if(lengthText.length() > 0)
{
return new moviescraper.doctord.model.dataitem.Runtime(lengthText);
}
else return new moviescraper.doctord.model.dataitem.Runtime("");
}
@Override
public Thumb[] scrapePosters() {
return scrapePostersAndFanart(true);
}
@Override
public Thumb[] scrapeFanart() {
return scrapePostersAndFanart(false);
}
private Thumb[] scrapePostersAndFanart(boolean doCrop) {
Element posterElement = document
.select("img#video_jacket_img")
.first();
Thumb[] posterThumbs = new Thumb[1];
if(posterElement != null)
{
String posterLink = posterElement.attr("src").trim();
try{
if (doCrop)
//posterThumbs[0] = new Thumb(posterLink, 52.7, 0, 0, 0);
posterThumbs[0] = new Thumb(posterLink, true);
else
posterThumbs[0] = new Thumb(posterLink);
return posterThumbs;
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return new Thumb[0];
}
}
else return new Thumb[0];
}
@Override
public MPAARating scrapeMPAA() {
return MPAARating.RATING_XXX;
}
@Override
public ID scrapeID() {
Element idElement = document
.select("div#video_id tr td.header + td.text")
.first();
String idText = idElement.text();
if(idText.length() > 0)
{
return new ID(idText);
}
else return ID.BLANK_ID;
}
@Override
public ArrayList<Genre> scrapeGenres() {
Elements genreElements = document
.select(".genre");
ArrayList<Genre> genreList = new ArrayList<>(genreElements.size());
for (Element genreElement : genreElements)
{
String currentGenreText = genreElement.text().trim();
//Sometimes javlibrary has junk genres like video sample. It's not really a genre, so get rid of it!
if(acceptGenreText(currentGenreText))
genreList.add(new Genre(currentGenreText));
}
return genreList;
}
private boolean acceptGenreText(String genreText){
switch(genreText)
{
case "Video Sample":
return false;
case "Blu-ray":
return false;
case "With Gifts":
return false;
default:
break;
}
return true;
}
@Override
public ArrayList<Actor> scrapeActors() {
Elements castElements = document
.select("span.cast");
ArrayList<Actor> actorList = new ArrayList<>(castElements.size());
for (Element castElement : castElements) {
String actressName = castElement.select("span.star a").text().trim();
Elements aliasElements = castElement.select("span.alias");
String [] aliasNames = new String[aliasElements.size()];
int i = 0; //index of loop iteration
for(Element aliasElement : aliasElements)
{
String currentAlias = aliasElement.text().trim();
//we might need to reverse the alias name from lastname, firstname to firstname lastname, if we're scraping in english and
//we specify in options
if(reverseAsianNameInEnglish && siteLanguageToScrape == englishLanguageCode && currentAlias.contains(" "))
currentAlias = StringUtils.reverseDelimited(currentAlias, ' ');
aliasNames[i] = currentAlias;
i++;
}
//String aliasName = castElement.select("span.alias").text().trim();
//JavLibrary has asian names in Lastname, first format. Reverse it, if we specify it with the option to do so
//but only do this if we're scraping in english
if(reverseAsianNameInEnglish && (siteLanguageToScrape == englishLanguageCode || scrapingLanguage == Language.ENGLISH) && actressName.contains(" "))
{
actressName = StringUtils.reverseDelimited(actressName, ' ');
}
/*if(reverseAsianNameInEnglish && siteLanguageToScrape == englishLanguageCode && aliasName.contains(" "))
aliasName = StringUtils.reverseDelimited(aliasName, ' ');
if(aliasName.length() > 0)
actressName += " (" + aliasName + ")";*/
if(aliasNames.length > 0)
{
for(int j = 0; j < aliasNames.length; j++)
{
actressName = actressName + " (" + aliasNames[j] + ")";
}
}
actorList.add(new Actor(actressName,"",null));
}
return actorList;
}
@Override
public ArrayList<Director> scrapeDirectors() {
Elements directorElements = document
.select(".director a");
ArrayList<Director> directorList = new ArrayList<>(directorElements.size());
for (Element currentDirectorElement : directorElements)
{
String currentDirectorName = currentDirectorElement.text().trim();
directorList.add(new Director(currentDirectorName,null));
}
return directorList;
}
@Override
public Studio scrapeStudio() {
Element studioElement = document
.select(".maker a")
.first();
if(studioElement != null)
{
return new Studio(studioElement.text().trim());
}
else return Studio.BLANK_STUDIO;
}
@Override
public String createSearchString(File file) {
scrapedMovieFile = file;
String fileNameNoExtension = findIDTagFromFile(file, isFirstWordOfFileIsID());
//return fileNameNoExtension;
URLCodec codec = new URLCodec();
try {
String fileNameURLEncoded = codec.encode(fileNameNoExtension);
String searchTerm = "http://www.javlibrary.com/" + siteLanguageToScrape + "/vl_searchbyid.php?keyword=" + fileNameURLEncoded;
return searchTerm;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
@Override
public SearchResult[] getSearchResults(String searchString) throws IOException {
ArrayList<SearchResult> linksList = new ArrayList<>();
String websiteURLBegin = "http://www.javlibrary.com/" + siteLanguageToScrape;
try{
Document doc = Jsoup.connect(searchString).userAgent("Mozilla").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
//The search found the page directly
if(doc.baseUri().contains("/?v="))
{
String linkTitle = doc.title().replaceAll(Pattern.quote(" - JAVLibrary"), "");
Element posterElement = doc
.select("img#video_jacket_img")
.first();
//the page does not have the small version on it, but by replacing the last character of the string with an t, we will get the tiny preview
if(posterElement != null)
{
String posterURLSmall = posterElement.attr("src");
posterURLSmall = posterURLSmall.substring(0, posterURLSmall.lastIndexOf('l')) + "t.jpg";
linksList.add(new SearchResult(doc.baseUri(), linkTitle, new Thumb(posterURLSmall)));
}
else
{
linksList.add(new SearchResult(doc.baseUri(), linkTitle));
}
//System.out.println("Added " + doc.baseUri());
return linksList.toArray(new SearchResult[linksList.size()]);
}
else
{
//The search didn't find an exact match and took us to the search results page
//We're filtering out anything that does not exactly match the id from the search query
String searchId = new URLCodec().decode(searchString.replaceAll(".*\\?keyword=(.*)$", "$1")).toUpperCase();
Elements videoLinksElements = doc.select("div.video:has(div.id:matchesOwn(^"+Pattern.quote(searchId)+"$))");
for(Element videoLink : videoLinksElements)
{
String currentLink = videoLink.select("a").attr("href");
String currentLinkLabel = videoLink.select("a").attr("title").trim();
String currentLinkImage = videoLink.select("img").attr("src");
if(currentLink.length() > 1)
{
String fullLink = websiteURLBegin + currentLink.substring(1);
linksList.add(new SearchResult(fullLink,currentLinkLabel,new Thumb(currentLinkImage)));
//System.out.println("Added " + fullLink);
}
}
return linksList.toArray(new SearchResult[linksList.size()]);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DecoderException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return new SearchResult[0];
}
@Override
public Thumb[] scrapeExtraFanart() {
//No extra Fanart on this site is supported, for now
return new Thumb[0];
}
@Override
public String toString(){
return "JavLibrary";
}
@Override
public SiteParsingProfile newInstance() {
return new JavLibraryParsingProfile();
}
@Override
public String getParserName() {
return "JAVLibrary";
}
}