package org.Webgatherer.Controller.Api;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.rickdane.springmodularizedproject.api.transport.*;
import org.Webgatherer.Api.Scraper.ScraperFactory;
import org.Webgatherer.Common.Properties.PropertiesContainer;
import org.Webgatherer.Controller.EntityTransport.EntryTransport;
import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunication;
import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunicationBase;
import org.Webgatherer.ExperimentalLabs.DependencyInjection.DependencyBindingModule;
import org.Webgatherer.ExperimentalLabs.EmailExtraction.PageRetrieverThreadManagerEmailExtraction;
import org.Webgatherer.ExperimentalLabs.Mail.EmailSendReceive;
import org.Webgatherer.ExperimentalLabs.Scraper.Core.ScraperBase;
import org.Webgatherer.Utility.RandomSelector;
import org.Webgatherer.WorkflowExample.Workflows.Implementations.WebGatherer.EnumUrlRetrieveOptions;
import java.util.*;
import java.util.concurrent.ConcurrentLinkedQueue;
/**
* This is the main method for the applicatin, it polls the API at a set interval to check for new jobs to run
*
* @author Rick Dane
*/
public class ApiCommunication extends BaseApiCommunication {
private static final String baseApiUrl = "http://ec2-107-21-182-174.compute-1.amazonaws.com:8080/";
//private static final String baseApiUrl = "http://localhost:8080/springmodularizedproject1/";
private static final String serviceEndpointGetScraper = baseApiUrl + "webgathererjobs/getPendingJobToLaunch";
private static final String servicePersistRawscrapeddata = baseApiUrl + "rawscrapeddatas";
private static final String serviceUrlsAwaitingEmailScrape = baseApiUrl + "rawscrapeddatas/retrieveUrlsAwaitingEmailScrape";
private static final String scraperEndPoint = baseApiUrl + "/scrapers";
private static final String postEmailListEndPoint = baseApiUrl + "/receivedemails/uploadNewRetrievedEmails";
private static final String emailToSendEndPoint = baseApiUrl + "/emailaddresses/getEmailToSend";
private static int callIntervalSeconds = 10;
private static boolean isRunning = true;
private static int pageNum = 1;
private static int maxPages = 2;
private static int maxUrlEmailScrapeUrls = 20;
private static int sizeOfStringArrayEnum = 9;
private static PropertiesContainer propertiesContainer = new PropertiesContainer();
public static void main(String[] args) {
emailImap.configureImap(emailProperties.getProperty("email1_imap"), emailProperties.getProperty("email1_imap_username"), emailProperties.getProperty("email1_password"));
while (isRunning) {
EntryTransport entryTransport = new EntryTransport();
Scraper curScraper = apiPost(entryTransport, serviceEndpointGetScraper, Scraper.class);
if (curScraper != null) {
runUrlScrapeJob(curScraper);
}
runEmailScrapeJob();
Date curTime = new Date();
if (nextEmailSendTime == null || curTime.getTime() > nextEmailSendTime.getTime()) {
getEmailAndSend();
}
//runEmailRetrieve();
sleep();
}
}
private static void runEmailRetrieve() {
List<ReceivedEmail> receivedEmailList = emailImap.retrieveUnreadEmails();
apiPost(receivedEmailList, postEmailListEndPoint);
}
private static void runEmailScrapeJob() {
int i = 1;
Map<String, Rawscrapeddata> rawscrapeddataList = new HashMap<String, Rawscrapeddata>();
for (i = 1; i <= maxUrlEmailScrapeUrls; i++) {
//dummy object
TransportBase transportBase = new TransportBase();
Rawscrapeddata rawscrapeddata = apiPost(transportBase, serviceUrlsAwaitingEmailScrape, Rawscrapeddata.class);
if (rawscrapeddata != null) {
rawscrapeddataList.put(rawscrapeddata.getUrl(), rawscrapeddata);
}
}
if (!rawscrapeddataList.isEmpty()) {
runEmailExtractionJob(rawscrapeddataList);
}
}
private static boolean runUrlScrapeJob(Scraper curScraper) {
String scraperType = "";
if (curScraper.getType() == Scraper.Type.CRAIGSLIST) {
scraperType = "generic";
} else {
return false;
}
ScraperBase scraper = ScraperFactory.createScraper(scraperType);
scraper.configure(curScraper.getUrlPrefix(), curScraper.getUrlPostfix(), curScraper.getBaseDomainName(), curScraper.getPageIncrementAmnt());
List<String[]> urlEntries = scraper.run(curScraper.getKeyword(), pageNum, maxPages);
for (String[] curEntry : urlEntries) {
Rawscrapeddata rawscrapeddata = new Rawscrapeddata();
rawscrapeddata.setUrl(curEntry[1]);
rawscrapeddata.setFkScraperId(curScraper.getId());
rawscrapeddata.setRawscrapeddataEmailScrapeAttempted(Rawscrapeddata.RawscrapeddataEmailScrapeAttempted.NOT_ATTEMPTED);
apiPost(rawscrapeddata, servicePersistRawscrapeddata, Rawscrapeddata.class);
}
curScraper.setStatus(Scraper.ProcessStatus.PROCESSED);
apiPut(curScraper, scraperEndPoint);
return true;
}
private static void sleep() {
try {
Thread.sleep(callIntervalSeconds * 1000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
private static Queue<String> prepareQueueForEmails(Map<String, Rawscrapeddata> rawscrapeddataList) {
Queue queue = new ConcurrentLinkedQueue<String>();
for (Map.Entry<String, Rawscrapeddata> curEntry : rawscrapeddataList.entrySet()) {
Rawscrapeddata curRawscrapeddata = curEntry.getValue();
String[] testEntry = new String[sizeOfStringArrayEnum];
testEntry[ThreadCommunicationBase.PageQueueEntries.BASE_URL.ordinal()] = curRawscrapeddata.getUrl();
queue.add(testEntry);
}
return queue;
}
private static void runEmailExtractionJob(Map<String, Rawscrapeddata> rawscrapeddataList) {
Injector injector = Guice.createInjector(new DependencyBindingModule());
PageRetrieverThreadManagerEmailExtraction pageRetrieverThreadManager = injector.getInstance(PageRetrieverThreadManagerEmailExtraction.class);
ThreadCommunication threadCommunication = injector.getInstance(ThreadCommunication.class);
pageRetrieverThreadManager.configure(threadCommunication);
threadCommunication.setPageQueue(prepareQueueForEmails(rawscrapeddataList));
while (!threadCommunication.isPageQueueEmpty()) {
try {
Thread.sleep(25);
} catch (InterruptedException e) {
}
pageRetrieverThreadManager.run(EnumUrlRetrieveOptions.HTMLPAGE.ordinal());
}
try {
Thread.sleep(15000);
} catch (InterruptedException e) {
}
while (!threadCommunication.isOutputDataHolderEmpty()) {
//TODO: Note that this will overwrite where there is more than 1 rawscrapeddata entry per url, consider re-working this at some point to account for this
String[] curEntry = threadCommunication.getFromOutputDataHolder();
String urlKey = curEntry[ThreadCommunicationBase.PageQueueEntries.BASE_URL.ordinal()];
String email = curEntry[ThreadCommunicationBase.PageQueueEntries.EMAIL_ADDRESSES.ordinal()];
Rawscrapeddata curRawscrapeddata = rawscrapeddataList.get(urlKey);
curRawscrapeddata.setEmailAddress(email);
curRawscrapeddata.setRawscrapeddataEmailScrapeAttempted(Rawscrapeddata.RawscrapeddataEmailScrapeAttempted.ATTEMPTED);
apiPut(curRawscrapeddata, servicePersistRawscrapeddata);
}
}
private static void getEmailAndSend() {
//dummy object
TransportBase transportBase = new TransportBase();
EmailTransport emailTransport = apiPost(transportBase, emailToSendEndPoint, EmailTransport.class);
if (emailTransport == null) {
return;
}
String body = emailTransport.getBody();
if (body != null) {
body = body.replace("//n", "/n");
emailTransport.setBody(body);
if (emailTransport.getToEmail() != null) {
sendEmail(emailTransport);
}
}
}
private static final int minDelay = 250000;
private static final int maxDelay = 800000;
private static Injector injector = Guice.createInjector(new DependencyBindingModule());
private static EmailSendReceive emailSendReceive = injector.getInstance(EmailSendReceive.class);
private static EmailSendReceive emailImap = injector.getInstance(EmailSendReceive.class);
private static RandomSelector randomSelector;
private static Date nextEmailSendTime = null;
private static Properties emailProperties = propertiesContainer.getProperties("emailAccounts");
private static void sendEmail(EmailTransport emailTransport) {
emailSendReceive.configure(emailProperties.getProperty("email_fromName"), emailProperties.getProperty("email1_smtp"), emailProperties.getProperty("email1_address"), emailProperties.getProperty("email1_password"), emailProperties.getProperty("email1_smtp_port"));
String attachmentFilePath = emailProperties.getProperty("email_attachment1");
String body = emailTransport.getBody();
String subject = emailTransport.getSubject();
emailSendReceive.sendEmail(body, subject, emailTransport.getToEmail(), attachmentFilePath); //curEmail
randomSelector = injector.getInstance(RandomSelector.class);
int delay = randomSelector.generateRandomNumberInRange(minDelay, maxDelay);
Date date = new Date();
nextEmailSendTime = new Date(date.getTime() + delay);
}
}