package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.lang3.text.WordUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.languagetranslation.JapaneseCharacter;
import moviescraper.doctord.controller.languagetranslation.Language;
import moviescraper.doctord.controller.languagetranslation.TranslateString;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
public class JavBusParsingProfile extends SiteParsingProfile implements SpecificProfile {
public static final String urlLanguageEnglish = "en";
public static final String urlLanguageJapanese = "ja";
//JavBus divides movies into two categories - censored and uncensored.
//All censored movies need cropping of their poster
private boolean isCensoredSearch = true;
private Document japaneseDocument;
@Override
public List<ScraperGroupName> getScraperGroupNames()
{
if(groupNames == null)
groupNames = Arrays.asList(ScraperGroupName.JAV_CENSORED_SCRAPER_GROUP);
return groupNames;
}
private void initializeJapaneseDocument() {
if(japaneseDocument == null)
{
String urlOfCurrentPage = document.location();
if(urlOfCurrentPage != null && urlOfCurrentPage.contains("/en/"))
{
//the genres are only available on the japanese version of the page
urlOfCurrentPage = urlOfCurrentPage.replaceFirst(Pattern.quote("http://www.javbus.com/en/"), "http://www.javbus.com/ja/");
if(urlOfCurrentPage.length() > 1)
{
try {
japaneseDocument = Jsoup.connect(urlOfCurrentPage).userAgent("Mozilla").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
else if(document != null)
japaneseDocument = document;
}
}
@Override
public Title scrapeTitle() {
Element titleElement = document.select("title").first();
if(titleElement != null)
{
String titleText = titleElement.text();
titleText = titleText.replace("- JavBus", "");
//Remove the ID from the front of the title
if(titleText.contains(" "))
titleText = titleText.substring(titleText.indexOf(" "),titleText.length());
//Translate the element using google translate if needed
if(scrapingLanguage == Language.ENGLISH && JapaneseCharacter.containsJapaneseLetter(titleText))
titleText = TranslateString.translateStringJapaneseToEnglish(titleText);
return new Title(titleText);
}
else return new Title("");
}
@Override
public OriginalTitle scrapeOriginalTitle() {
initializeJapaneseDocument();
if(japaneseDocument != null)
{
Element titleElement = japaneseDocument.select("title").first();
if(titleElement != null)
{
String titleText = titleElement.text();
titleText = titleText.replace("- JavBus", "");
//Remove the ID from the front of the title
if(titleText.contains(" "))
titleText = titleText.substring(titleText.indexOf(" "),titleText.length());
return new OriginalTitle(titleText);
}
}
return OriginalTitle.BLANK_ORIGINALTITLE;
}
@Override
public SortTitle scrapeSortTitle() {
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
String seriesWord = (scrapingLanguage == Language.ENGLISH) ? "Series:" : "シリーズ:";
Element setElement = document.select("span.header:containsOwn(" + seriesWord + ") ~ a").first();
if(setElement != null && setElement.text().length() > 0)
{
String setText = setElement.text();
if(scrapingLanguage == Language.ENGLISH && JapaneseCharacter.containsJapaneseLetter(setText))
{
setText = TranslateString.translateStringJapaneseToEnglish(setText);
}
return new Set(setText);
}
return Set.BLANK_SET;
}
@Override
public Rating scrapeRating() {
return Rating.BLANK_RATING;
}
@Override
public Year scrapeYear() {
return scrapeReleaseDate().getYear();
}
@Override
public ReleaseDate scrapeReleaseDate() {
String releaseDateWord = (scrapingLanguage == Language.ENGLISH) ? "Release Date:" : "発売日:";
Element releaseDateElement = document.select("p:contains(" + releaseDateWord + ")").first();
if(releaseDateElement != null && releaseDateElement.ownText().trim().length() > 4)
{
String releaseDateText = releaseDateElement.ownText().trim();
return new ReleaseDate(releaseDateText);
}
return ReleaseDate.BLANK_RELEASEDATE;
}
@Override
public Top250 scrapeTop250() {
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
String lengthWord = (scrapingLanguage == Language.ENGLISH) ? "Length:" : "�?�録時間:";
Element lengthElement = document.select("p:contains(" + lengthWord + ")").first();
if(lengthElement != null && lengthElement.ownText().trim().length() >= 0)
{
//Getting rid of the word "min" in both Japanese and English
String runtimeText = lengthElement.ownText().trim().replace("min","");
runtimeText = runtimeText.replace("分", "");
return new Runtime(runtimeText);
}
return Runtime.BLANK_RUNTIME;
}
@Override
public Thumb[] scrapePosters() {
return scrapePostersAndFanart(true);
}
@Override
public Thumb[] scrapeFanart() {
return scrapePostersAndFanart(false);
}
private Thumb[] scrapePostersAndFanart(boolean isPosterScrape)
{
Element posterElement = document.select("a.bigImage").first();
if(posterElement != null)
{
try {
Thumb posterImage = new Thumb(posterElement.attr("href"), (isCensoredSearch && isPosterScrape));
Thumb[] posterArray = {posterImage};
return posterArray;
} catch (IOException e) {
e.printStackTrace();
return new Thumb[0];
}
}
else return new Thumb[0];
}
@Override
public Thumb[] scrapeExtraFanart() {
Elements extraFanartElements = document.select("div.sample-box ul li a");
if(extraFanartElements != null && extraFanartElements.size() > 0)
{
Thumb[] extraFanart = new Thumb[extraFanartElements.size()];
int i = 0;
for(Element extraFanartElement : extraFanartElements)
{
String href = extraFanartElement.attr("href");
try {
extraFanart[i] = new Thumb(href);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
i++;
}
return extraFanart;
}
return new Thumb[0];
}
@Override
public MPAARating scrapeMPAA() {
return new MPAARating("XXX");
}
@Override
public ID scrapeID() {
Element idElement = document.select("span.movie-code, span.header:containsOwn(ID:) + span").first();
if(idElement != null)
return new ID(idElement.text());
else return ID.BLANK_ID;
}
@Override
public ArrayList<Genre> scrapeGenres() {
ArrayList<Genre> genreList = new ArrayList<>();
Elements genreElements = document.select("span.genre a[href*=/genre/");
if(genreElements != null)
{
for(Element genreElement : genreElements)
{
String genreText = genreElement.text();
if(genreElement.text().length() > 0)
{
//some genre elements are untranslated, even on the english site, so we need to do it ourselves
if(scrapingLanguage == Language.ENGLISH && JapaneseCharacter.containsJapaneseLetter(genreText))
{
genreText = TranslateString.translateStringJapaneseToEnglish(genreText);
}
genreList.add(new Genre(WordUtils.capitalize(genreText)));
}
}
}
return genreList;
}
@Override
public ArrayList<Actor> scrapeActors() {
ArrayList<Actor> actorList = new ArrayList<>();
Elements actorElements = document.select("div.star-box li a img");
if(actorElements != null)
{
for(Element currentActor: actorElements)
{
String actorName = currentActor.attr("title");
//Sometimes for whatever reason the english page still has the name in japanaese, so I will translate it myself
if(scrapingLanguage == Language.ENGLISH && JapaneseCharacter.containsJapaneseLetter(actorName))
actorName = TranslateString.translateJapanesePersonNameToRomaji(actorName);
String actorImage = currentActor.attr("src");
if(actorImage != null && !actorImage.contains("printing.gif") && fileExistsAtURL(actorImage))
{
try {
actorList.add(new Actor(actorName, null, new Thumb(actorImage)));
} catch (MalformedURLException e) {
e.printStackTrace();
actorList.add(new Actor(actorName, null, null));
}
}
else
{
actorList.add(new Actor(actorName, null, null));
}
}
}
return actorList;
}
@Override
public ArrayList<Director> scrapeDirectors() {
ArrayList<Director> directorList = new ArrayList<>();
String directorWord = (scrapingLanguage == Language.ENGLISH) ? "Director:" : "監�?�:";
Element directorElement = document.select("span.header:containsOwn(" + directorWord + ") ~ a").first();
if(directorElement != null && directorElement.text().length() > 0)
{
directorList.add(new Director(directorElement.text(), null));
}
return directorList;
}
@Override
public Studio scrapeStudio() {
String studioWord = (scrapingLanguage == Language.ENGLISH) ? "Studio:" : "メーカー:";
Element studioElement = document.select("span.header:containsOwn(" + studioWord + ") ~ a").first();
if(studioElement != null && studioElement.text().length() > 0)
{
return new Studio(studioElement.text());
}
return Studio.BLANK_STUDIO;
}
@Override
public String createSearchString(File file) {
scrapedMovieFile = file;
String fileNameNoExtension = findIDTagFromFile(file, isFirstWordOfFileIsID());
URLCodec codec = new URLCodec();
try {
String fileNameURLEncoded = codec.encode(fileNameNoExtension);
String searchTerm = "http://www.javbus.com/" + getUrlLanguageToUse() + "/search/" + fileNameURLEncoded;
return searchTerm;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
private String getUrlLanguageToUse()
{
String urlLanguageToUse = (scrapingLanguage == Language.ENGLISH) ? urlLanguageEnglish : urlLanguageJapanese;
return urlLanguageToUse;
}
@Override
public SearchResult[] getSearchResults(String searchString)
throws IOException {
ArrayList<SearchResult> linksList = new ArrayList<>();
try{
Document doc = Jsoup.connect(searchString).userAgent("Mozilla").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
Elements videoLinksElements = doc.select("div.item");
if(videoLinksElements == null || videoLinksElements.size() == 0)
{
searchString = searchString.replace("/search/", "/uncensored/search/");
isCensoredSearch = false;
}
doc = Jsoup.connect(searchString).userAgent("Mozilla").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
videoLinksElements = doc.select("div.item");
if(videoLinksElements != null)
{
for(Element videoLink : videoLinksElements)
{
String currentLink = videoLink.select("a").attr("href");
String currentLinkLabel = videoLink.select("a").text().trim();
String currentLinkImage = videoLink.select("img").attr("src");
if(currentLink.length() > 1)
{
linksList.add(new SearchResult(currentLink,currentLinkLabel,new Thumb(currentLinkImage)));
}
}
}
return linksList.toArray(new SearchResult[linksList.size()]);
}
catch (IOException e) {
e.printStackTrace();
return new SearchResult[0];
}
}
@Override
public SiteParsingProfile newInstance() {
return new JavBusParsingProfile();
}
@Override
public String getParserName() {
return "JavBus";
}
}