package org.Webgatherer.ExperimentalLabs.EmailExtraction; import com.google.inject.Inject; import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunicationBase; import org.Webgatherer.CoreEngine.Core.Threadable.WebGather.ThreadRetrievePage; import org.Webgatherer.CoreEngine.lib.WebDriverFactory; import org.Webgatherer.ExperimentalLabs.HtmlProcessing.HtmlParser; import org.Webgatherer.WorkflowExample.Workflows.Base.DataInterpetor.EmailExtractor; import org.Webgatherer.WorkflowExample.Workflows.Base.DataInterpetor.TextExtraction; import org.openqa.selenium.By; import org.openqa.selenium.WebElement; import java.util.LinkedList; import java.util.Set; /** * @author Rick Dane */ public class ThreadRetrievePageEmailExtraction extends ThreadRetrievePage { protected EmailExtractor emailExtractor; private static String delimeter = "#"; private static String delimeter2nd = "~"; @Inject public ThreadRetrievePageEmailExtraction(WebDriverFactory webDriverFactory, TextExtraction textExtraction, HtmlParser htmlParser, EmailExtractor emailExtractor) { super(webDriverFactory, textExtraction, htmlParser); this.emailExtractor = emailExtractor; } @Override protected void getPage() { System.out.println("thread attempting to get page"); driver.get(entry[ThreadCommunicationBase.PageQueueEntries.BASE_URL.ordinal()]); Set<String> strSet = driver.getWindowHandles(); String page = driver.getPageSource(); String curUrl = driver.getCurrentUrl(); LinkedList<String> emailAddresses = emailExtractor.extractEmailAddressesList(page); StringBuilder emailsStrb = new StringBuilder(); if (!emailAddresses.isEmpty()) { for (String email : emailAddresses) { emailsStrb.append(email + ","); } } entry[ThreadCommunicationBase.PageQueueEntries.EMAIL_ADDRESSES.ordinal()] = emailsStrb.toString(); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("url" + delimeter2nd + curUrl + delimeter); stringBuilder.append("title" + delimeter2nd + driver.getTitle() + delimeter); entry[ThreadCommunicationBase.PageQueueEntries.CUSTOM_RET_VALUE.ordinal()] = stringBuilder.toString(); threadCommunication.addToOutputDataHolder(entry); } public static String getDelimeter() { return delimeter; } public static String getDelimeter2nd() { return delimeter2nd; } @Override protected boolean actionIfUrlValid () { return true; } }