package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
public class TokyoHotParsingProfile extends SiteParsingProfile implements SpecificProfile {
private String searchString;
private Document docSite;
private Document docImage;
private String siteLink;
private String imageLink;
private String id;
private static final SimpleDateFormat tokyoHotReleaseDateFormat = new SimpleDateFormat("dd-MMM-yyyy hh:mm", Locale.ENGLISH);
public TokyoHotParsingProfile() {
}
@Override
public void setDocument(Document document) {
super.setDocument(document);
docSite = document;
docImage = SiteParsingProfile.downloadDocumentFromURLString(imageLink);
}
@Override
public Title scrapeTitle() {
Elements elements = docSite.select("div font[size=2]");
if ( elements.size() > 2 )
return new Title( elements.get(0).ownText().replace(""", "").replace("\"", "") );
return null;
}
@Override
public OriginalTitle scrapeOriginalTitle() {
return OriginalTitle.BLANK_ORIGINALTITLE;
}
@Override
public SortTitle scrapeSortTitle() {
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
return new Set("Tokyo Hot");
}
@Override
public Rating scrapeRating() {
return new Rating(0,"");
}
@Override
public Year scrapeYear() {
return scrapeReleaseDate().getYear();
}
@Override
public ReleaseDate scrapeReleaseDate()
{
ReleaseDate releaseDate = ReleaseDate.BLANK_RELEASEDATE;
Elements releaseDateElements = docImage.select("td[align=right]");
for(Element currentElement : releaseDateElements)
{
if (releaseDateElements.size() > 2) {
Pattern pattern = Pattern.compile("[0-9]{4}");
String timecode = currentElement.ownText();
Matcher matcher = pattern.matcher(timecode);
if (matcher.find()) {
// the last element we find seems to be the most accurate
// date, but I'm not 100% sure what each of these dates
// represents
// since they seem to vary by a few days usually
releaseDate = new ReleaseDate(timecode,
tokyoHotReleaseDateFormat);
}
}
}
return releaseDate;
}
@Override
public Top250 scrapeTop250() {
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
Elements elements = docSite.select("tr td[align=left]");
if (elements.size() > 0) {
String ownText = elements.get(0).childNode(0).childNode(0).toString().trim();
return new Plot(ownText);
}
return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
Elements elements = docSite.select("td[align=center] font strong");
Pattern timePattern = Pattern.compile("[0-9]{2,3} min");
Pattern minPattern = Pattern.compile("[0-9]{2,3}");
String time = "";
for (Element element : elements) {
String node = element.childNode(0).toString();
Matcher matcher = timePattern.matcher(node);
if (matcher.find()) {
time = matcher.group();
Matcher minMatcher = minPattern.matcher(time);
if ( minMatcher.find() ) {
time = minMatcher.group();
}
}
}
return new Runtime(time);
}
@Override
public Thumb[] scrapePosters() {
try {
Thumb[] thumbs = new Thumb[1];
thumbs[0] = new Thumb(getImageLink(searchString) + "_v.jpg");
return thumbs;
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return new Thumb[0];
}
@Override
public Thumb[] scrapeFanart() {
try {
Thumb[] thumbs = new Thumb[1];
thumbs[0] = new Thumb(getImageLink(searchString) + "_vb.jpg", getImageLink(searchString) + "_v.jpg");
return thumbs;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return new Thumb[0];
}
@Override
public Thumb[] scrapeExtraFanart() {
ArrayList<Thumb> extraFanart = new ArrayList<>();
return extraFanart.toArray(new Thumb[extraFanart.size()]);
}
@Override
public MPAARating scrapeMPAA() {
return MPAARating.RATING_XXX;
}
@Override
public ID scrapeID() {
return new ID(findIDTag(searchString).toUpperCase());
}
@Override
public ArrayList<Genre> scrapeGenres() {
Genre a = new Genre("uncensored");
Genre b = new Genre("No condom");
ArrayList<Genre> list = new ArrayList<>();
Collections.addAll(list, a, b);
return list;
}
@Override
public ArrayList<Actor> scrapeActors() {
ArrayList<Actor> list = new ArrayList<>();
Elements elements = docSite.select("div font[size=2]");
if ( elements.size() > 2 )
list.add( new Actor(elements.get(1).childNode(0).toString(), null, null) );
return list;
}
@Override
public ArrayList<Director> scrapeDirectors() {
Director a = new Director("Tokyo Hot", null);
ArrayList<Director> list = new ArrayList<>();
Collections.addAll(list, a);
return list;
}
@Override
public Studio scrapeStudio() {
return new Studio("Tokyo Hot");
}
@Override
public String createSearchString(File file) {
scrapedMovieFile = file;
String fileID = findIDTagFromFile(file).toLowerCase();
if ( fileID != null ) {
try {
Document doc = Jsoup.connect("http://cdn.www.tokyo-hot.com/igs/").userAgent("Mozilla").ignoreHttpErrors(true).timeout(SiteParsingProfile.CONNECTION_TIMEOUT_VALUE).get();
Elements select = doc.select("tr td a");
String foundLink = null;
for (Element element : select) {
String link = element.attr("href");
if ( link.startsWith( fileID ) ) {
foundLink = link;
break;
}
}
if ( foundLink == null ) {
System.out.println("Found no Link for TokyoHot");
return null;
}
id = foundLink.replace("/", "");
imageLink = getImageLink(id);
siteLink = getSiteLink(id);
return siteLink;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return null;
}
@Override
public SearchResult[] getSearchResults(String searchString)
throws IOException {
SearchResult searchResult = new SearchResult( searchString, searchString);
SearchResult[] sr = {searchResult};
return sr;
}
public static String findIDTagFromFile(File file) {
return findIDTag(FilenameUtils.getName(file.getName()));
}
public static String findIDTag(String fileName) {
Pattern pattern = Pattern.compile("n[0-9]{3,4}");
Matcher matcher = pattern.matcher(fileName);
if (matcher.find()) {
String searchString = matcher.group();
return searchString;
}
return null;
}
private String getImageLink(String searchString) {
return "http://cdn.www.tokyo-hot.com/igs/" + searchString + "/";
}
private String getSiteLink(String searchString) {
this.searchString = searchString;
return "http://cdn.www.tokyo-hot.com/e/" + searchString + "_e.html";
}
@Override
public String getParserName() {
return "Tokyo Hot";
}
@Override
public SiteParsingProfile newInstance() {
return new TokyoHotParsingProfile();
}
}