/*******************************************************************************
* Copyright (c) 2011 Subgraph.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Subgraph - initial API and implementation
******************************************************************************/
package com.subgraph.vega.impl.scanner;
import java.net.URI;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.http.client.methods.HttpUriRequest;
import com.subgraph.vega.api.analysis.IContentAnalyzer;
import com.subgraph.vega.api.crawler.ICrawlerProgressTracker;
import com.subgraph.vega.api.crawler.IWebCrawler;
import com.subgraph.vega.api.http.requests.IHttpRequestEngine;
import com.subgraph.vega.api.model.IWorkspace;
import com.subgraph.vega.api.model.alerts.IScanInstance;
import com.subgraph.vega.api.scanner.IScannerConfig;
import com.subgraph.vega.api.scanner.modules.IBasicModuleScript;
import com.subgraph.vega.api.scanner.modules.IResponseProcessingModule;
import com.subgraph.vega.api.scanner.modules.IScannerModule;
import com.subgraph.vega.api.scanner.modules.IScannerModuleRunningTime;
import com.subgraph.vega.impl.scanner.urls.UriFilter;
import com.subgraph.vega.impl.scanner.urls.UriParser;
public class ScannerTask implements Runnable, ICrawlerProgressTracker {
private final Logger logger = Logger.getLogger("scanner");
private final IScanInstance scanInstance;
private final Scanner scanner;
private final IScannerConfig scannerConfig;
private final IWorkspace workspace;
private final IContentAnalyzer contentAnalyzer;
private final IHttpRequestEngine requestEngine;
private final List<IResponseProcessingModule> responseProcessingModules;
private final List<IBasicModuleScript> basicModules;
private volatile boolean stopRequested;
private IWebCrawler currentCrawler;
ScannerTask(IScanInstance scanInstance, Scanner scanner, IScannerConfig config,
IHttpRequestEngine requestEngine, IWorkspace workspace,
IContentAnalyzer contentAnalyzer, List<IResponseProcessingModule> responseModules, List<IBasicModuleScript> basicModules) {
this.scanInstance = scanInstance;
this.scanner = scanner;
this.scannerConfig = config;
this.workspace = workspace;
this.requestEngine = requestEngine;
this.contentAnalyzer = contentAnalyzer;
this.responseProcessingModules = responseModules;
this.basicModules = basicModules;
this.logger.setLevel(Level.ALL);
}
void stop() {
stopRequested = true;
if(currentCrawler != null)
try {
currentCrawler.stop();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public void run() {
contentAnalyzer.setResponseProcessingModules(responseProcessingModules);
scanInstance.updateScanStatus(IScanInstance.SCAN_AUDITING);
runCrawlerPhase();
if(stopRequested) {
scanInstance.updateScanStatus(IScanInstance.SCAN_CANCELLED);
logger.info("Scanner cancelled.");
} else {
scanInstance.updateScanStatus(IScanInstance.SCAN_COMPLETED);
logger.info("Scanner completed");
}
workspace.getScanAlertRepository().setActiveScanInstance(null);
scanner.unlock();
workspace.unlock();
printModuleRuntimeStats();
}
private void printModuleRuntimeStats() {
logger.info("Scanning module runtime statistics:");
for(IScannerModule m: responseProcessingModules) {
IScannerModuleRunningTime profile = m.getRunningTimeProfile();
if(profile.getInvocationCount() > 0)
logger.info(profile.toString());
}
for(IScannerModule m: basicModules) {
IScannerModuleRunningTime profile = m.getRunningTimeProfile();
if(profile.getInvocationCount() > 0)
logger.info(profile.toString());
}
}
private void runCrawlerPhase() {
logger.info("Starting crawling phase");
currentCrawler = scanner.getCrawlerFactory().create(requestEngine);
currentCrawler.registerProgressTracker(this);
UriParser uriParser = new UriParser(scannerConfig, basicModules, workspace, currentCrawler, new UriFilter(scannerConfig), contentAnalyzer, scanInstance);
URI baseURI = scannerConfig.getBaseURI();
uriParser.processUri(baseURI);
currentCrawler.start();
try {
currentCrawler.waitFinished();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
currentCrawler = null;
logger.info("Crawler finished");
}
@Override
public void progressUpdate(int completed, int total) {
scanInstance.updateScanProgress(completed, total);
}
@Override
public void exceptionThrown(HttpUriRequest request, Throwable exception) {
scanInstance.notifyScanException(request, exception);
}
}