package org.Webgatherer.CoreEngine.Core.Threadable.WebGather; import com.google.inject.Inject; import com.google.inject.Provider; import org.Webgatherer.Common.Properties.PropertiesContainer; import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunication; import org.Webgatherer.CoreEngine.Core.ThreadCommunication.ThreadCommunicationBase; import java.util.*; /** * @author Rick Dane */ public class PageRetrieverThreadManager { protected ThreadCommunication threadCommunication; private Map<String, Queue<String[]>> waitingUrls = new HashMap<String, Queue<String[]>>(); private HashSet<String> inWaiting = new HashSet<String>(); private WebGather webGather; private int maxNullEntries; private int cntMaxNullEntries; //TODO properties private int reloadInterval; private Date lastIntervalCheck = new Date(); protected Provider<ThreadRetrievePage> threadRetrievePageProvider; protected ThreadCommunicationPageRetriever threadCommunicationPageRetriever; @Inject public PageRetrieverThreadManager(Provider<ThreadRetrievePage> threadRetrievePageProvider, PropertiesContainer propertiesContainer, ThreadCommunicationPageRetriever threadCommunicationPageRetriever) { Properties properties = propertiesContainer.getProperties("CoreEngine"); maxNullEntries = Integer.parseInt(properties.getProperty("webGather_maxNullEntries")); cntMaxNullEntries = Integer.parseInt(properties.getProperty("webGather_cntMaxNullEntries")); this.threadCommunicationPageRetriever = threadCommunicationPageRetriever; this.threadRetrievePageProvider = threadRetrievePageProvider; reloadInterval = Integer.parseInt(properties.getProperty("pageRetrieverThreadManager_reloadInterval")); } public void checkToExpireInterval(int retrieveType) { Date now = new Date(); if (now.getTime() - lastIntervalCheck.getTime() > reloadInterval) { expireInterval(retrieveType); lastIntervalCheck = new Date(); } } private void expireInterval(int retrieveType) { for (Map.Entry<String, Queue<String[]>> entries : waitingUrls.entrySet()) { String key = entries.getKey(); Queue<String[]> curQueue = entries.getValue(); if (!curQueue.isEmpty()) { String[] curEntry = curQueue.remove(); if (curEntry != null) { launchThread(curEntry, retrieveType); if (curQueue.isEmpty()) { inWaiting.remove(curEntry[ThreadCommunicationBase.PageQueueEntries.KEY.ordinal()]); } } } } } public void configure(WebGather webGather, ThreadCommunication threadCommunication) { this.threadCommunication = threadCommunication; } public void configure(ThreadCommunication threadCommunication) { this.threadCommunication = threadCommunication; } public boolean run(int retrieveType) { if (threadCommunicationPageRetriever.allowedToCreateNewThread()) { String[] entry = threadCommunication.getFromPageQueue(); if (entry == null) { return false; } if (!determineIfCanStartThreadImmediately(entry)) { return false; } launchThread(entry, retrieveType); } else { return false; } return true; } protected void launchThread(String[] entry, int retrieveType) { ThreadRetrievePage threadRetrievePage = threadRetrievePageProvider.get(); threadRetrievePage.configure(entry, threadCommunication, retrieveType, threadCommunicationPageRetriever); threadRetrievePage.start(); } protected boolean determineIfCanStartThreadImmediately(String[] entry) { String key = entry[ThreadCommunicationBase.PageQueueEntries.KEY.ordinal()]; Queue domainSpecificQueue = waitingUrls.get(key); if (domainSpecificQueue == null) { domainSpecificQueue = new LinkedList<String[]>(); waitingUrls.put(key, domainSpecificQueue); } if (inWaiting.contains(key)) { domainSpecificQueue.add(entry); return false; } inWaiting.add(key); return true; } }