package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.text.WordUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.languagetranslation.Language;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Trailer;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
public class MyTokyoHotParsingProfile extends SiteParsingProfile implements SpecificProfile {
private boolean scrapeInEnglish = true;
Document japaneseDocument;
private static final SimpleDateFormat myTokyoHotReleaseDateFormat = new SimpleDateFormat("yyyy/MM/dd", Locale.ENGLISH);
@Override
public String getParserName() {
return "My Tokyo-Hot";
}
/**
* loads up the japanese version of this page into japaneseDocument
*/
private void initializeJapaneseDocument()
{
if(document != null && japaneseDocument == null)
{
String url = document.baseUri().replaceFirst(Pattern.quote("lang=en"), Pattern.quote("lang=ja"));
japaneseDocument = SiteParsingProfile.downloadDocumentFromURLString(url);
}
}
@Override
public Title scrapeTitle() {
Element titleElement = null;
if(getScrapingLanguage() == Language.ENGLISH)
titleElement = document.select("div.pagetitle").first();
else if(getScrapingLanguage() == Language.JAPANESE)
{
initializeJapaneseDocument();
titleElement = japaneseDocument.select("div.pagetitle").first();
}
if(titleElement != null)
{
return new Title(titleElement.text().trim());
}
return new Title("");
}
@Override
public OriginalTitle scrapeOriginalTitle() {
initializeJapaneseDocument();
Element originalTitleElement = japaneseDocument.select("div.pagetitle").first();
if(originalTitleElement != null)
{
return new OriginalTitle(originalTitleElement.text().trim());
}
return OriginalTitle.BLANK_ORIGINALTITLE;
}
@Override
public SortTitle scrapeSortTitle() {
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
return Set.BLANK_SET;
}
@Override
public Rating scrapeRating() {
return new Rating(10, "");
}
@Override
public Year scrapeYear() {
return scrapeReleaseDate().getYear();
}
@Override
public ReleaseDate scrapeReleaseDate() {
Element releaseDateElement = document.select("dl.info dt:contains(Release Date) + dd, dl.info dt:contains(�?信開始日) + dd").first();
if(releaseDateElement != null && releaseDateElement.text().length() > 4)
{
String releaseDateText = releaseDateElement.text().trim();
return new ReleaseDate(releaseDateText, myTokyoHotReleaseDateFormat);
}
return ReleaseDate.BLANK_RELEASEDATE;
}
@Override
public Top250 scrapeTop250() {
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
Element plotElement = null;
if(getScrapingLanguage() == Language.ENGLISH)
plotElement = document.select("div.contents div.sentence").first();
else if(getScrapingLanguage() == Language.JAPANESE)
{
initializeJapaneseDocument();
plotElement = japaneseDocument.select("div.contents div.sentence").first();
}
if(plotElement != null)
return new Plot(plotElement.text().trim());
else return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
Element durationElement = document.select("dl.info dt:contains(Duration) + dd, dl.info dt:contains(�?�録時間) + dd").first();
if(durationElement != null && durationElement.text().trim().length() > 0)
{
String [] durationSplitByTimeUnit = durationElement.text().split(":");
if(durationSplitByTimeUnit.length == 3)
{
int hours = Integer.parseInt(durationSplitByTimeUnit[0]);
int minutes = Integer.parseInt(durationSplitByTimeUnit[1]);
//we don't care about seconds
int totalMinutes = (hours * 60) + minutes;
return new Runtime(new Integer(totalMinutes).toString());
}
}
return Runtime.BLANK_RUNTIME;
}
@Override
public Thumb[] scrapePosters() {
ArrayList<Thumb> posters = new ArrayList<>();
Element trailerBackgroundImage = document.select("video[poster]").first();
if(trailerBackgroundImage != null)
{
String posterAttr = trailerBackgroundImage.attr("poster");
try {
//TODO: crop this poster
posters.add(new Thumb(posterAttr));
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Elements photoGalleryImgs = document.select("div.scap a");
for(Element galleryImg : photoGalleryImgs)
{
String imgLink = galleryImg.attr("abs:href");
Element thumbnailLink = galleryImg.select("img").first();
try {
Thumb imgThumb = new Thumb(imgLink);
if(thumbnailLink != null)
imgThumb.setPreviewURL(new URL(thumbnailLink.attr("src")));
posters.add(imgThumb);
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
return posters.toArray(new Thumb[posters.size()]);
}
@Override
public Trailer scrapeTrailer(){
//ArrayList<Thumb> posters = new ArrayList<Thumb>();
Element trailer = document.select("video source[src*=/samples/]").first();
if(trailer != null)
{
String trailerSrc = trailer.attr("src");
return new Trailer(trailerSrc);
}
return Trailer.BLANK_TRAILER;
}
@Override
public Thumb[] scrapeFanart() {
return scrapePosters();
}
@Override
public Thumb[] scrapeExtraFanart() {
return scrapePosters();
}
@Override
public MPAARating scrapeMPAA() {
return MPAARating.RATING_XXX;
}
@Override
public ID scrapeID() {
Element idElement = document.select("dl.info dt:contains(Product ID) + dd, dl.info dt:contains(作�?番�?�) + dd").first();
if(idElement != null && idElement.text().length() > 0)
return new ID(idElement.text().trim());
else return new ID("");
}
@Override
public ArrayList<Genre> scrapeGenres() {
ArrayList<Genre> genreList = new ArrayList<>();
Elements genreElements = null;
if(getScrapingLanguage() == Language.ENGLISH)
genreElements = document.select("dl.info dt:contains(Category) + dd a, dl.info dt:contains(カテゴリ) + dd a");
else if(getScrapingLanguage() == Language.JAPANESE)
{
initializeJapaneseDocument();
genreElements = japaneseDocument.select("dl.info dt:contains(Category) + dd a, dl.info dt:contains(カテゴリ) + dd a");
}
if(genreElements != null)
{
for(Element currentGenre : genreElements)
{
genreList.add(new Genre(WordUtils.capitalize(currentGenre.text().trim())));
}
}
return genreList;
}
@Override
public ArrayList<Actor> scrapeActors() {
ArrayList<Actor> actorList = new ArrayList<>();
Elements actressElements = null;
if(getScrapingLanguage() == Language.ENGLISH)
actressElements = document.select("dl.info dt:contains(Actress) + dd a, dl.info dt:contains(出演者) + dd a");
else if(getScrapingLanguage() == Language.JAPANESE)
{
initializeJapaneseDocument();
actressElements = japaneseDocument.select("dl.info dt:contains(Actress) + dd a, dl.info dt:contains(出演者) + dd a");
}
if(actressElements != null)
{
for(Element currentActress : actressElements)
{
String name = currentActress.text();
String href = currentActress.attr("href");
href = href.replaceAll(Pattern.quote("/cast/"), "");
href = href.replaceAll("/", "");
//now href is just the numerical number of this actor
String thumbnailLink = "http://my.cdn.tokyo-hot.com/media/cast/" + href + "/thumbnail.jpg";
if(SiteParsingProfile.fileExistsAtURL(thumbnailLink))
{
try {
actorList.add(new Actor(name, "", new Thumb(thumbnailLink)));
} catch (MalformedURLException e) {
e.printStackTrace();
actorList.add(new Actor(name, "", null));
}
}
else
{
actorList.add(new Actor(name, "", null));
}
}
}
return actorList;
}
@Override
public ArrayList<Director> scrapeDirectors() {
return new ArrayList<>();
}
@Override
public Studio scrapeStudio() {
return new Studio("Tokyo-Hot");
}
@Override
public String createSearchString(File file) {
scrapedMovieFile = file;
String fileID = findIDTagFromFile(file);
if(fileID == null)
return null;
fileID = fileID.toLowerCase();
if(fileID == null)
return null;
String searchURL = "http://my.tokyo-hot.com/product/?q=" + fileID + "&x=0&y=0";
return searchURL;
}
@Override
public SearchResult[] getSearchResults(String searchString)
throws IOException {
if(searchString == null)
return new SearchResult[0];
Document doc = Jsoup.connect(searchString).userAgent("Mozilla").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
Elements movieElements = doc.select("ul.list.slider.cf li.detail");
SearchResult[] searchResults = new SearchResult[movieElements.size()];
int indexNum = 0;
final String languageSuffixEnglish = "?lang=en";
final String languageSuffixJapanese = "?lang=ja";
String languageSuffixToUse = "";
if(scrapeInEnglish)
languageSuffixToUse = languageSuffixEnglish;
else
languageSuffixToUse = languageSuffixJapanese;
if(scrapeInEnglish)
for(Element movie : movieElements)
{
SearchResult currentSearchResult = null;
String urlPath = movie.select("a.rm").attr("abs:href") + languageSuffixToUse;
String label = movie.select("div.title").first().text();
String imageSrc = movie.select("a.rm img").attr("src");
if(imageSrc != null && imageSrc.length() > 0)
{
currentSearchResult = new SearchResult(urlPath, label, new Thumb(imageSrc));
}
else
{
currentSearchResult = new SearchResult(urlPath, label);
}
searchResults[indexNum] = currentSearchResult;
indexNum++;
}
return searchResults;
}
public static String findIDTagFromFile(File file) {
return findIDTag(FilenameUtils.getName(file.getName()));
}
public static String findIDTag(String fileName) {
Pattern pattern = Pattern.compile("[kn][0-9]{3,4}");
Matcher matcher = pattern.matcher(fileName);
if (matcher.find()) {
String searchString = matcher.group();
return searchString;
}
return null;
}
@Override
public SiteParsingProfile newInstance() {
return new MyTokyoHotParsingProfile();
}
}