package moviescraper.doctord.controller.siteparsingprofile.specific; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.LinkedList; import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile; import moviescraper.doctord.model.SearchResult; import moviescraper.doctord.model.dataitem.Actor; import moviescraper.doctord.model.dataitem.Director; import moviescraper.doctord.model.dataitem.Genre; import moviescraper.doctord.model.dataitem.ID; import moviescraper.doctord.model.dataitem.MPAARating; import moviescraper.doctord.model.dataitem.OriginalTitle; import moviescraper.doctord.model.dataitem.Outline; import moviescraper.doctord.model.dataitem.Plot; import moviescraper.doctord.model.dataitem.Rating; import moviescraper.doctord.model.dataitem.ReleaseDate; import moviescraper.doctord.model.dataitem.Runtime; import moviescraper.doctord.model.dataitem.Set; import moviescraper.doctord.model.dataitem.SortTitle; import moviescraper.doctord.model.dataitem.Studio; import moviescraper.doctord.model.dataitem.Tagline; import moviescraper.doctord.model.dataitem.Thumb; import moviescraper.doctord.model.dataitem.Title; import moviescraper.doctord.model.dataitem.Top250; import moviescraper.doctord.model.dataitem.Votes; import moviescraper.doctord.model.dataitem.Year; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class SquarePlusParsingProfile extends SiteParsingProfile implements SpecificProfile { private static final SimpleDateFormat squarePlusReleaseDateFormat = new SimpleDateFormat("MMM dd, yyyy", Locale.ENGLISH); @Override public List<ScraperGroupName> getScraperGroupNames() { if(groupNames == null) groupNames = Arrays.asList(ScraperGroupName.JAV_CENSORED_SCRAPER_GROUP); return groupNames; } public SquarePlusParsingProfile(Document document) { super(document); } public SquarePlusParsingProfile() { // TODO Auto-generated constructor stub } @Override public Title scrapeTitle() { Element titleElement = document .select("div.product-name.page-title h1") .first(); //remove the ID number off the end of the title, if it exists if(titleElement != null) { String titleElementText = titleElement.text().trim(); if(titleElementText.contains("(")) { titleElementText = titleElementText.substring(0, StringUtils.lastIndexOf(titleElementText,"(")); } return new Title(titleElementText); } //this shouldn't really ever happen... else return new Title(""); } @Override public OriginalTitle scrapeOriginalTitle() { //Does not have original japanese title, so don't return anything return OriginalTitle.BLANK_ORIGINALTITLE; } @Override public SortTitle scrapeSortTitle() { // we don't need any special sort title - that's usually something the // user provides return SortTitle.BLANK_SORTTITLE; } @Override public Set scrapeSet() { Element setElement = document.select("th.label:containsOwn(Series) ~ td").first(); if (setElement != null) return new Set(setElement.text()); return Set.BLANK_SET; } @Override public Rating scrapeRating() { //site doesn't have a rating return Rating.BLANK_RATING; } @Override public Year scrapeYear() { return scrapeReleaseDate().getYear(); } @Override public ReleaseDate scrapeReleaseDate() { Element releaseDateElement = document.select("th.label:containsOwn(Release date) ~ td").first(); if(releaseDateElement != null && releaseDateElement.text().length() > 4) { String releaseDateText = releaseDateElement.text().trim(); return new ReleaseDate(releaseDateText, squarePlusReleaseDateFormat); } return ReleaseDate.BLANK_RELEASEDATE; } @Override public Top250 scrapeTop250() { // This type of info doesn't exist on ActionJav return Top250.BLANK_TOP250; } @Override public Votes scrapeVotes() { return Votes.BLANK_VOTES; } @Override public Outline scrapeOutline() { return Outline.BLANK_OUTLINE; } @Override public Plot scrapePlot() { return Plot.BLANK_PLOT; } @Override public Tagline scrapeTagline() { return Tagline.BLANK_TAGLINE; } @Override public Runtime scrapeRuntime() { Element runtimeElement = document.select("th.label:containsOwn(Play Time) ~ td").first(); if(runtimeElement != null) return new Runtime(runtimeElement.text()); else return Runtime.BLANK_RUNTIME; } @Override public Thumb[] scrapePosters() { return scrapePostersAndFanart(true); } @Override public Thumb[] scrapeFanart() { return scrapePostersAndFanart(false); } private Thumb[] scrapePostersAndFanart(boolean doCrop) { Element boxArtElement = document.select("p.product-image a").first(); if(boxArtElement != null) { Thumb poster; try { poster = new Thumb(boxArtElement.attr("href"), doCrop); Thumb[] posters = {poster}; return posters; } catch (IOException e) { e.printStackTrace(); } } return new Thumb[0]; } @Override public MPAARating scrapeMPAA() { // It's always XXX content on ActionJav! ;) return MPAARating.RATING_XXX; } @Override public ID scrapeID() { Element idElement = document .select("div.page-title h1") .first(); //just get the ID number off the end of the title, if it exists if(idElement != null && idElement.text().contains("(")) { String idElementText = idElement.text().trim(); idElementText = idElementText.substring(StringUtils.lastIndexOf(idElementText,"(")+1, idElementText.length()-1); return new ID(idElementText); } //maybe some titles don't have ID numbers on squareplus or we got some other error else return ID.BLANK_ID; } @Override public ArrayList<Genre> scrapeGenres() { ArrayList<Genre> genreList = new ArrayList<>(); Element genreElement = document.select("th.label:containsOwn(Genre) ~ td").first(); if(genreElement != null) { String [] actorSplitList = genreElement.text().split(","); for(String genreToAdd : actorSplitList) genreList.add(new Genre(genreToAdd)); } return genreList; } @Override public ArrayList<Actor> scrapeActors() { ArrayList<Actor> actorList = new ArrayList<>(); Element featuringElement = document.select("th.label:containsOwn(Featuring) ~ td:not(:containsOwn(Various))").first(); if(featuringElement != null) { String [] actorSplitList = featuringElement.text().split(","); for(String actorToAdd : actorSplitList) actorList.add(new Actor(actorToAdd,"",null)); } Element starringElement = document.select("th.label:containsOwn(Starring) ~ td:not(:containsOwn(Various))").first(); if(starringElement != null) { String [] actorSplitList = starringElement.text().split(","); for(String actorToAdd : actorSplitList) actorList.add(new Actor(actorToAdd,"",null)); } return actorList; } @Override public ArrayList<Director> scrapeDirectors() { //SquarePlus doesn't have director information, so just return an empty list return new ArrayList<>(); } @Override public Studio scrapeStudio() { Element studioElement = document.select("th.label:containsOwn(Label) ~ td:not(:containsOwn(Other))").first(); if(studioElement != null) return new Studio(studioElement.text()); return Studio.BLANK_STUDIO; } @Override public String createSearchString(File file) { scrapedMovieFile = file; String searchId = findIDTagFromFile(file, isFirstWordOfFileIsID()); return "http://www.squareplus.co.jp/catalogsearch/result/?q=" + searchId; } @Override public SearchResult[] getSearchResults(String searchString) throws IOException { if (searchString == null) return new SearchResult[0]; Document doc = Jsoup.connect(searchString).timeout(CONNECTION_TIMEOUT_VALUE).get(); Elements foundMovies = doc.select("ul.products-grid>li"); String searchId = searchString.replaceAll(".*\\?q=(.*)$", "$1").replace("-", "").toLowerCase(); LinkedList<SearchResult> searchList = new LinkedList<>(); for(Element movie: foundMovies){ String urlPath = movie.select("a").first().attr("href"); String thumb = movie.select("img").first().attr("src"); String label = movie.select(".product-name,.actresslist").text(); SearchResult searchResult = new SearchResult(urlPath, label, new Thumb(thumb)); if (urlPath.endsWith("/" + searchId + ".html")) searchList.addFirst(searchResult); else searchList.addLast(searchResult); } // if both DVD and Blue-Ray gets listed, pick the correct one if (searchList.size() == 2) if (searchList.get(0).getUrlPath().endsWith("/"+searchId+".html")) if (searchList.get(1).getUrlPath().endsWith("/9"+searchId+".html")) searchList.remove(1); return searchList.toArray(new SearchResult[searchList.size()]); } @Override public Thumb[] scrapeExtraFanart() { Elements extraFanartElements = document.select("div.more-views ul li a"); if(extraFanartElements != null) { ArrayList<Thumb> extrafanartThumbList = new ArrayList<>(extraFanartElements.size()); for(Element extraFanartElement : extraFanartElements) { Thumb thumbToAdd; try { thumbToAdd = new Thumb(extraFanartElement.attr("href")); extrafanartThumbList.add(thumbToAdd); } catch (MalformedURLException e) { e.printStackTrace(); } } return extrafanartThumbList.toArray(new Thumb[extrafanartThumbList.size()]); } return new Thumb[0]; } @Override public String toString(){ return "SquarePlus"; } @Override public SiteParsingProfile newInstance() { return new SquarePlusParsingProfile(); } @Override public String getParserName() { return "SquarePlus"; } }