package moviescraper.doctord.controller.siteparsingprofile.specific;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import org.apache.commons.lang3.text.WordUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import moviescraper.doctord.controller.languagetranslation.Language;
import moviescraper.doctord.controller.languagetranslation.TranslateString;
import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile;
import moviescraper.doctord.model.SearchResult;
import moviescraper.doctord.model.dataitem.Actor;
import moviescraper.doctord.model.dataitem.Director;
import moviescraper.doctord.model.dataitem.Genre;
import moviescraper.doctord.model.dataitem.ID;
import moviescraper.doctord.model.dataitem.MPAARating;
import moviescraper.doctord.model.dataitem.OriginalTitle;
import moviescraper.doctord.model.dataitem.Outline;
import moviescraper.doctord.model.dataitem.Plot;
import moviescraper.doctord.model.dataitem.Rating;
import moviescraper.doctord.model.dataitem.ReleaseDate;
import moviescraper.doctord.model.dataitem.Runtime;
import moviescraper.doctord.model.dataitem.Set;
import moviescraper.doctord.model.dataitem.SortTitle;
import moviescraper.doctord.model.dataitem.Studio;
import moviescraper.doctord.model.dataitem.Tagline;
import moviescraper.doctord.model.dataitem.Thumb;
import moviescraper.doctord.model.dataitem.Title;
import moviescraper.doctord.model.dataitem.Top250;
import moviescraper.doctord.model.dataitem.Votes;
import moviescraper.doctord.model.dataitem.Year;
public class OneThousandGiriParsingProfile extends SiteParsingProfile implements
SpecificProfile {
private String idSearchedForFromFileName;
Document japaneseDocument;
/**
* loads up the japanese version of this page into japaneseDocument
*/
private void initializeJapaneseDocument()
{
if(document != null && japaneseDocument == null && scrapingLanguage.equals(Language.ENGLISH))
{
String url = document.baseUri().replaceFirst("http://en", "http://www");
System.out.println("url = " + url);
japaneseDocument = SiteParsingProfile.downloadDocumentFromURLString(url);
}
else if(document != null && japaneseDocument == null)
japaneseDocument = document;
}
@Override
public Title scrapeTitle() {
if(pageIsValidMoviePage())
{
Element titleElement = document.select("head title").first();
if(scrapingLanguage.equals(Language.ENGLISH) && titleElement != null)
{
return new Title(WordUtils.capitalize(TranslateString
.translateStringJapaneseToEnglish(titleElement.text()
.replace("1000giri.com | ", ""))));
}
else if(scrapingLanguage.equals(Language.JAPANESE))
{
initializeJapaneseDocument();
return new Title(getJapaneseTitleText(japaneseDocument));
}
}
return new Title("");
}
/**
* Helper method to get the japanese title. Used for both the original title
* and the title if the scraper is scraping the page in Japanese
*/
private String getJapaneseTitleText(Document japaneseDocument)
{
Element titleElement = japaneseDocument.select("head title").first();
if(titleElement != null)
{
String titleElementText = titleElement.text();
if(titleElementText.contains("| "))
{
try{
titleElementText = titleElementText.substring(titleElementText.indexOf("| ")+2);
}
catch(ArrayIndexOutOfBoundsException e)
{
e.printStackTrace();
}
}
return titleElementText;
}
return "";
}
@Override
public OriginalTitle scrapeOriginalTitle() {
initializeJapaneseDocument();
return new OriginalTitle(getJapaneseTitleText(japaneseDocument));
}
@Override
public SortTitle scrapeSortTitle() {
//This is something the user sets themselves
return SortTitle.BLANK_SORTTITLE;
}
@Override
public Set scrapeSet() {
//No set information for this website
return Set.BLANK_SET;
}
@Override
public Rating scrapeRating() {
//No rating information on this website
return Rating.BLANK_RATING;
}
@Override
public Year scrapeYear() {
return scrapeReleaseDate().getYear();
}
@Override
public ReleaseDate scrapeReleaseDate()
{
initializeJapaneseDocument();
Element releaseDateElement = japaneseDocument.select("table.detail tbody tr th:contains(配信日) + td").first();
if(releaseDateElement != null && releaseDateElement.text().length() > 4)
{
return new ReleaseDate(releaseDateElement.text().trim());
}
return ReleaseDate.BLANK_RELEASEDATE;
}
@Override
public Top250 scrapeTop250() {
//No Top250 on this website
return Top250.BLANK_TOP250;
}
@Override
public Votes scrapeVotes() {
//No Votes on this website
return Votes.BLANK_VOTES;
}
@Override
public Outline scrapeOutline() {
//No outline on this website
return Outline.BLANK_OUTLINE;
}
@Override
public Plot scrapePlot() {
initializeJapaneseDocument();
Element plotElement = japaneseDocument.select("table.detail tbody tr td p").last();
if(plotElement != null && plotElement.text().length() > 0)
{
String plotText = plotElement.text();
if(scrapingLanguage.equals(Language.ENGLISH))
plotText = TranslateString.translateStringJapaneseToEnglish(plotText);
return new Plot(plotText);
}
return Plot.BLANK_PLOT;
}
@Override
public Tagline scrapeTagline() {
//No Tagline info on this site
return Tagline.BLANK_TAGLINE;
}
@Override
public Runtime scrapeRuntime() {
//no Runtime info on this site
return Runtime.BLANK_RUNTIME;
}
@Override
public Thumb[] scrapePosters() {
if (pageIsValidMoviePage()) {
Thumb swfPoster;
try {
swfPoster = new Thumb(baseSiteUrl() + "gallery/"
+ idSearchedForFromFileName + "/images/swf_f.jpg");
} catch (MalformedURLException e) {
e.printStackTrace();
return new Thumb[0];
}
Thumb[] posterArray = { swfPoster };
return posterArray;
}
return new Thumb[0];
}
@Override
public Thumb[] scrapeFanart() {
return new Thumb[0];
}
@Override
public Thumb[] scrapeExtraFanart() {
return new Thumb[0];
}
@Override
public MPAARating scrapeMPAA() {
return MPAARating.RATING_XXX;
}
@Override
public ID scrapeID() {
if (pageIsValidMoviePage())
return new ID(idSearchedForFromFileName);
else return ID.BLANK_ID;
}
@Override
public ArrayList<Genre> scrapeGenres() {
initializeJapaneseDocument();
Elements genreElements;
ArrayList<Genre> genreList = new ArrayList<>();
if(getScrapingLanguage().equals(Language.ENGLISH))
genreElements = document.select("table.detail tbody tr th:contains(Type) + td a, table.detail tbody tr th:contains(Genres) + td a");
else
genreElements = japaneseDocument.select("table.detail tbody tr th:contains(タイプ) + td a, table.detail tbody tr th:contains(ジャンル) + td a");
if(genreElements != null)
{
for(Element genre : genreElements)
{
if(genre.text().length() > 0)
{
if(!genre.text().equals("Exclusive video"))
{
Genre genreToAdd = new Genre(genre.text());
genreList.add(genreToAdd);
}
}
}
}
return genreList;
}
@Override
public ArrayList<Actor> scrapeActors() {
initializeJapaneseDocument();
Elements actorElements;
ArrayList<Actor> actorList = new ArrayList<>();
if(getScrapingLanguage().equals(Language.ENGLISH))
actorElements = document.select("table.detail tbody tr th:contains(Name) + td a");
else
actorElements = japaneseDocument.select("table.detail tbody tr th:contains(名前) + td a");
if(actorElements != null)
{
for(Element actor : actorElements)
{
if(actor.text().length() > 0)
{
Actor actorToAdd = new Actor(actor.text(),"",null);
actorList.add(actorToAdd);
}
}
}
return actorList;
}
@Override
public ArrayList<Director> scrapeDirectors() {
//No Director info on this site
return new ArrayList<>();
}
@Override
public Studio scrapeStudio() {
return new Studio("1000giri");
}
@Override
public String createSearchString(File file) {
scrapedMovieFile = file;
idSearchedForFromFileName = findIDTagFromFile(file, isFirstWordOfFileIsID());
return idSearchedForFromFileName;
}
@Override
public SearchResult[] getSearchResults(String searchString)
throws IOException {
if(idSearchedForFromFileName == null)
idSearchedForFromFileName = searchString;
if(searchString != null && searchString.length() > 0)
{
SearchResult [] searchResultArray =
{new SearchResult(baseSiteUrl() + "moviepages/" + searchString + "/index.html",
searchString,
new Thumb(baseSiteUrl() + "gallery/" + searchString + "/images/index_s.jpg"))};
//if no page for this ID, don't try to keep scraping by returning a URL to a page that doesn't exist
if(fileExistsAtURL(searchResultArray[0].getUrlPath()))
return searchResultArray;
}
return new SearchResult[0];
}
@Override
public SiteParsingProfile newInstance() {
return new OneThousandGiriParsingProfile();
}
@Override
public String getParserName() {
return "1000giri";
}
private String baseSiteUrl()
{
if(this.scrapingLanguage.equals(Language.ENGLISH))
return "http://en.1000giri.net/";
else return "http://www.1000giri.net/";
}
/**
* Used to make sure when we are scraping our document that we have not been
* redirected due to a 404 when scraping from our URL
* @return true if we are on a valid page to scrape from, false otherwise
*/
private boolean pageIsValidMoviePage()
{
if(document.baseUri().contains(idSearchedForFromFileName))
return true;
else return false;
}
}