package org.Webgatherer.ExperimentalLabs.EmailExtraction; import com.google.inject.Guice; import com.google.inject.Injector; import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunication; import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunicationBase; import org.Webgatherer.CoreEngine.lib.WebDriverFactory; import org.Webgatherer.ExperimentalLabs.DependencyInjection.DependencyBindingModule; import org.Webgatherer.Persistence.InputOutput.PersistenceImpl_WriteToFile; import org.Webgatherer.Persistence.InputOutput.WriterOutputQueueToFile; import org.Webgatherer.Utility.ReadFiles; import org.Webgatherer.WorkflowExample.Workflows.Implementations.WebGatherer.EnumUrlRetrieveOptions; import java.util.List; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; /** * @author Rick Dane */ public class ExtractEmailsFromFileMain { private static final String inputFilePath = "/home/user/Dropbox/Rick/WebGatherer/Output/google/searchResultLinks.txt"; private static final String outputFilePath = "/home/user/Dropbox/Rick/WebGatherer/Output/extractedEmails.txt"; private static ReadFiles readFiles; private static int sizeOfStringArrayEnum = 9; private static WriterOutputQueueToFile writerOutputQueueToFile; public static void main(String[] args) { Injector injector = Guice.createInjector(new DependencyBindingModule()); PageRetrieverThreadManagerEmailExtraction pageRetrieverThreadManager = injector.getInstance(PageRetrieverThreadManagerEmailExtraction.class); ThreadCommunication threadCommunication = injector.getInstance(ThreadCommunication.class); pageRetrieverThreadManager.configure(threadCommunication); readFiles = injector.getInstance(ReadFiles.class); writerOutputQueueToFile = injector.getInstance(WriterOutputQueueToFile.class); threadCommunication.setPageQueue(prepareQueue()); while (!threadCommunication.isPageQueueEmpty()) { try { Thread.sleep(25); } catch (InterruptedException e) { } pageRetrieverThreadManager.run(EnumUrlRetrieveOptions.HTMLPAGE.ordinal()); } try { Thread.sleep(20000); } catch (InterruptedException e) { } writerOutputQueueToFile.writeQueueToFile(outputFilePath, threadCommunication, ThreadCommunicationBase.PageQueueEntries.EMAIL_ADDRESSES.ordinal(), true, ","); } private static Queue<String> prepareQueue() { List<String> urls = readFiles.readLinesToList(inputFilePath); Queue queue = new ConcurrentLinkedQueue<String>(); for (String curUrl : urls) { String[] testEntry = new String[sizeOfStringArrayEnum]; testEntry[ThreadCommunicationBase.PageQueueEntries.BASE_URL.ordinal()] = curUrl; queue.add(testEntry); } return queue; } }