package moviescraper.doctord.controller.amalgamation; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import javax.swing.SwingWorker; import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile; import moviescraper.doctord.controller.siteparsingprofile.SiteParsingProfile.ScraperGroupName; import moviescraper.doctord.model.Movie; import moviescraper.doctord.model.dataitem.DataItemSource; import moviescraper.doctord.view.ScrapeAmalgamatedProgressDialog; public class ScrapeAmalgamatedMovieWorker extends SwingWorker<Void, Map<SiteParsingProfile, Movie>> { List<Thread> scrapeThreads; boolean promptUserForURLWhenScraping = true; //do we stop to ask the user to pick a URL when scraping int progress; int amountOfProgressPerSubtask; SwingWorker<Void, String> worker; boolean scrapeCanceled; List<Map<SiteParsingProfile,Movie>> currentPublishedMovies; int numberOfScrapesToRun = 0; int numberOfScrapesFinished = 0; Map<String, SwingWorker<Void, Void>> runningWorkers; File fileToScrape; AllAmalgamationOrderingPreferences allAmalgamationOrderingPreferences; ScraperGroupAmalgamationPreference scraperGroupAmalgamationPreference; ScrapeAmalgamatedProgressDialog parent; /** * * @param allAmalgamationOrderingPreferences * @param guiMain * @param scraperGroupAmalgamationPreference * @param fileToScrape - file scraped if no gui (if there is a gui we use the state variable from there wich is the file to scrape) */ public ScrapeAmalgamatedMovieWorker(AllAmalgamationOrderingPreferences allAmalgamationOrderingPreferences, ScraperGroupAmalgamationPreference scraperGroupAmalgamationPreference, File fileToScrape, ScrapeAmalgamatedProgressDialog parent) { runningWorkers = new HashMap<>(); progress = 0; amountOfProgressPerSubtask = 0; scrapeCanceled = false; this.scraperGroupAmalgamationPreference = scraperGroupAmalgamationPreference; this.fileToScrape = fileToScrape; this.parent = parent; this.allAmalgamationOrderingPreferences = allAmalgamationOrderingPreferences; } SwingWorker<Void, Void> getWorkerByScraperName(SiteParsingProfile scraper) { SwingWorker<Void, Void> worker = runningWorkers.get(scraper.getDataItemSourceName()); return worker; } public void cancelRunningScraper(SiteParsingProfile scraper) { SwingWorker<Void, Void> scraperToCancel = runningWorkers.get(scraper.getDataItemSourceName()); if(scraperToCancel != null) { System.out.println("Canceling " + scraper + " + thread."); boolean wasThreadCanceled = scraperToCancel.cancel(true); if(wasThreadCanceled) { numberOfScrapesFinished++; } } } public void cancelAllRunningScrapers() { for(SwingWorker<Void, Void> currentWorker : runningWorkers.values()) { if(currentWorker != null) { System.out.println("Canceling " + currentWorker); currentWorker.cancel(true); } } } /*private static void failIfInterrupted() throws InterruptedException { if (Thread.currentThread().isInterrupted()) { throw new InterruptedException("Interrupted while searching files"); } }*/ /** * * @param parsingProfile - item to check if scraping is enabled for this parsing profile * @return true if scraper should scrape for parsingProfile, false otherwise */ private boolean shouldScrapeThread(DataItemSource parsingProfile) { //Default group used for site specific scraping - always want to return true since there's just one thread to scrape if(scraperGroupAmalgamationPreference.getScraperGroupName().equals(ScraperGroupName.DEFAULT_SCRAPER_GROUP)) return true; for (ScraperGroupName currentName : ScraperGroupName.values()) { ScraperGroupAmalgamationPreference currentPref = allAmalgamationOrderingPreferences .getScraperGroupAmalgamationPreference(currentName); LinkedList<DataItemSource> overallPrefs = currentPref .getOverallAmalgamationPreference() .getAmalgamationPreferenceOrder(); for(DataItemSource currentDataItemSource : overallPrefs) { if(currentDataItemSource.getDataItemSourceName().equals(parsingProfile.getDataItemSourceName())) { boolean disabled = currentDataItemSource.isDisabled(); return !disabled; } } } return false; } @Override protected Void doInBackground() { setProgress(0); //failIfInterrupted(); //get the latest version of the sraper group preference - if it's not there for whatever reason (usually from a specific scrape), just leave it alone ScraperGroupAmalgamationPreference scraperGroupAmalgamationPreferenceNew = allAmalgamationOrderingPreferences.getScraperGroupAmalgamationPreference(scraperGroupAmalgamationPreference.getScraperGroupName()); if(scraperGroupAmalgamationPreferenceNew != null) scraperGroupAmalgamationPreference = scraperGroupAmalgamationPreferenceNew; LinkedList<DataItemSource> scraperList = scraperGroupAmalgamationPreference.getOverallAmalgamationPreference().getAmalgamationPreferenceOrder(); //calculate progress amount per worker int numberOfScrapes = 0; for(DataItemSource currentScraper : scraperList) { if(shouldScrapeThread(currentScraper) && currentScraper instanceof SiteParsingProfile) numberOfScrapes++; } int progressAmountPerWorker = 100 / numberOfScrapes; for(DataItemSource currentScraper : scraperList) { //We don't want to read any leftover properties from our JSON - we want to start fresh so things like scraping language do not get set in our scraper currentScraper = currentScraper.createInstanceOfSameType(); if(currentScraper instanceof SiteParsingProfile) { if(shouldScrapeThread(currentScraper)) { scrapeMovieInBackground(fileToScrape, currentScraper, progressAmountPerWorker); numberOfScrapesToRun++; } } } //failIfInterrupted(); //System.out.println("returnMovie is " + returnMovie); //setProgress(100); return null; } private Movie scrapeMovieInBackground(File fileToScrape, DataItemSource scraper, int amountOfProgress) { // failIfInterrupted(); if (scraper instanceof SiteParsingProfile) { final SiteParsingProfile siteScraper = (SiteParsingProfile) scraper; final ScrapeAmalgamatedMovieWorker self = this; final int amtOfProgressFinal = amountOfProgress; final File fileToScrapeFinal = fileToScrape; System.out.println(fileToScrapeFinal); SwingWorker<Void, Void> scraperWorker = new SwingWorker<Void, Void>() { Movie returnMovie; @Override protected Void doInBackground() throws Exception { try { //delegate back to the parent, if we have one, to override the URL we are going to scrape with a custom URL provided by the user. boolean customURLSet = false; if(parent != null) { customURLSet = ScrapeAmalgamatedProgressDialog.showPromptForUserProvidedURL(siteScraper, fileToScrapeFinal); } returnMovie = Movie.scrapeMovie(fileToScrapeFinal, siteScraper, "", customURLSet); return null; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } @Override protected void done() { self.numberOfScrapesFinished++; //System.out.println("Movie scraped = " + returnMovie); Map<SiteParsingProfile, Movie> resultToPublish = new HashMap<>(); resultToPublish.put(siteScraper, returnMovie); self.publish(resultToPublish); self.progress = amtOfProgressFinal + self.progress; self.setProgress(self.progress); System.out.println("Scraping complete of siteScraper = " + siteScraper); self.runningWorkers.remove(siteScraper); } }; self.runningWorkers.put(scraper.getDataItemSourceName(), scraperWorker); scraperWorker.execute(); } // failIfInterrupted(); return null; } @Override protected void done() { } /** * Enums used to fire properties. * ALL_SCRAPES_FINISHED - used when all scraper workers have finished or been canceled * SCRAPED_MOVIE - One of the scraper threads has finished and is returning back the amalgamated movie it found */ public enum ScrapeAmalgamatedMovieWorkerProperty { ALL_SCRAPES_FINISHED, SCRAPED_MOVIE } @Override protected void process(List<Map<SiteParsingProfile,Movie>> movies) { firePropertyChange(ScrapeAmalgamatedMovieWorkerProperty.SCRAPED_MOVIE.toString(), currentPublishedMovies, movies); currentPublishedMovies = movies; if(numberOfScrapesFinished >= numberOfScrapesToRun) firePropertyChange(ScrapeAmalgamatedMovieWorkerProperty.ALL_SCRAPES_FINISHED.toString(), null, numberOfScrapesFinished); else { System.out.println("Finished " + numberOfScrapesFinished + "/" + numberOfScrapesToRun + " scrape threads."); } } }