package org.Webgatherer.Controller; import org.Webgatherer.Controller.Component.ControllerFlow; import org.Webgatherer.CoreEngine.DependencyInjection.DependencyBindingModule; import org.Webgatherer.CoreEngine.Core.ThreadCommunication.FinalOutputContainer; import com.google.inject.Guice; import com.google.inject.Injector; import org.Webgatherer.Persistence.InputOutput.PersistenceImpl_WriteToFile; import org.Webgatherer.Utility.ReadFiles; import org.Webgatherer.Utility.TextCleaner; import org.Webgatherer.WorkflowExample.DataHolders.ContainerBase; import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; /** * @author Rick Dane */ public class Entry_ExampleRun_WebPagesScrape { private static final String FILE_OUTPUT = "/home/user/Dropbox/Rick/WebGatherer/Output/webScrape.html"; private static final String INPUT_URLS = "/home/user/Dropbox/Rick/WebGatherer/Input/inputUrls"; private static final String WORKFLOW_WEBGATHER = "org.Webgatherer.WorkflowExample.Workflows.Implementations.WebGatherer.Workflow_WebGather_1"; private static final String WORKFLOW_DATAINTERPRETOR = "org.Webgatherer.WorkflowExample.Workflows.Implementations.DataInterpetor.Workflow_DataInterpretor_1"; public static void main(String[] args) { Injector injector = Guice.createInjector(new DependencyBindingModule()); ControllerFlow wfContrl = injector.getInstance(ControllerFlow.class); FinalOutputContainer finalOutputContainer = launchWebGathererThread(injector, wfContrl, WORKFLOW_WEBGATHER, WORKFLOW_DATAINTERPRETOR); testPrintResults(finalOutputContainer); } private static FinalOutputContainer launchWebGathererThread(Injector injector, ControllerFlow wfContrl, String workflow2, String workflow3) { List<String> workflowlist = new ArrayList<String>(); workflowlist.add(workflow2); workflowlist.add(workflow3); Map<String, Object> parameterMap = new HashMap<String, Object>(); parameterMap.put("pageQueue", PreparePageQueue()); FinalOutputContainer finalOutputContainer = injector.getInstance(FinalOutputContainer.class); wfContrl.configure(finalOutputContainer, workflowlist, parameterMap); wfContrl.start(); return finalOutputContainer; } private static void testPrintResults(FinalOutputContainer finalOutputContainer) { int THREAD_SLEEP = 400; int LIST_FIRST_ITEM = 0; int killCount = 50; int countKilledSoFar = 0; while (true) { Map<String, ContainerBase> outputMap = null; try { outputMap = finalOutputContainer.removeFromFinalOutputContainer(); } catch (Exception e) { try { Thread.sleep(THREAD_SLEEP); } catch (InterruptedException e1) { e1.printStackTrace(); } continue; } if (outputMap == null || outputMap.isEmpty()) { try { Thread.sleep(THREAD_SLEEP); } catch (InterruptedException e) { e.printStackTrace(); } continue; } ContainerBase outputContainer = null; for (Map.Entry<String, ContainerBase> entries : outputMap.entrySet()) { String key = entries.getKey(); outputContainer = outputMap.get(key); LinkedList<String> list = outputContainer.getEntries(); PersistenceImpl_WriteToFile.appendToFile(FILE_OUTPUT, "<br/> <br/> " + key + ": <br/> <br/>"); for (String curStr : list) { PersistenceImpl_WriteToFile.appendToFile(FILE_OUTPUT, " <a href='" + curStr + "'>" + curStr + "</a> ,"); } countKilledSoFar++; break; } if (countKilledSoFar == killCount) { break; } } } private static Queue PreparePageQueue() { Queue<String[]> pageQueue = new ConcurrentLinkedQueue<String[]>(); TextCleaner textCleaner = new TextCleaner(); ReadFiles readFiles = new ReadFiles(); List<String> rawUrls = readFiles.readLinesToList(INPUT_URLS); for (String curUrl : rawUrls) { String[] site1 = {textCleaner.removeUrlPrefix(curUrl), curUrl, null, null}; pageQueue.add(site1); } return pageQueue; } }