Spider.java example

Explorer

zaproxy-master
- src
  - ch
    - csnc
      - extension
        httpclient
        AliasCertificate.java
        AliasKeyManager.java
        PKCS11Configuration.java
        SSLContextManager.java
        ui
        AliasTableModel.java
        CertificateView.java
        DriverTableModel.java
        DriversView.java
        util
        DriverConfiguration.java
        Encoding.java
        OptionsParamExperimentalSliSupport.java
  - org
- test
  - ch
    - csnc
      - extension
        httpclient
        AliasCertificateUnitTest.java
        AliasKeyManagerUnitTest.java
        PKCS11ConfigurationUnitTest.java
        SSLContextManagerUnitTest.java
        util
        EncodingUnitTest.java
  - org
    - apache
      - commons
        httpclient
        HttpMethodBaseUnitTest.java
    - parosproxy
      - paros
        CommandLineUnitTest.java
        common
        AbstractParamUnitTest.java
        core
        scanner
        AbstractPluginUnitTest.java
        KbUnitTest.java
        NameValuePairUnitTest.java
        PluginFactoryUnitTest.java
        PluginTestUtils.java
        UtilUnitTest.java
        VariantCookieUnitTest.java
        VariantHeaderUnitTest.java
        VariantODataUnitTest.java
        model
        FileCopierUnitTest.java
        network
        HttpBodyUnitTest.java
        HttpRequestHeaderUnitTest.java
        HttpResponseHeaderUnitTest.java
    - zaproxy
      - zap
        VersionUnitTest.java
        WithConfigsTest.java
        authentication
        AuthenticationMethodIndicatorsUnitTest.java
        UsernamePasswordAuthenticationCredentialsUnitTest.java
        control
        AddOnCollectionUnitTest.java
        AddOnUnitTest.java
        ZapReleaseComparitorUnitTest.java
        ZapReleaseUnitTest.java
        extension
        alert
        ExtensionAlertUnitTest.java
        api
        APIUnitTest.java
        ApiResponseConversionUtilsUnitTest.java
        OptionsParamApiUnitTest.java
        authorization
        BasicAuthorizationDetectionMethodUnitTest.java
        brk
        impl
        http
        HttpBreakpointManagementDaemonImplUnitTest.java
        dynssl
        SslCertificateUtilsUnitTest.java
        ext
        ExtensionParamUnitTest.java
        httppanel
        view
        hex
        HttpPanelHexModelUnitTest.java
        util
        HttpTextViewUtilsUnitTest.java
        lang
        LangImporterUnitTest.java
        pscan
        PluginPassiveScannerUnitTest.java
        ruleconfig
        RuleConfigParamUnitTest.java
        model
        ContextUnitTest.java
        SessionUtilsUnitTest.java
        StandardParameterParserUnitTest.java
        VulnerabilitiesLoaderUnitTest.java
        network
        HttpBodyTestUtils.java
        HttpResponseBodyUnitTest.java
        spider
        URLCanonicalizerUnitTest.java
        URLResolverRfc1808ExamplesUnitTest.java
        URLResolverUnitTest.java
        filters
        DefaultFetchFilterUnitTest.java
        HttpPrefixFetchFilterUnitTest.java
        parser
        SpiderHtmlFormParserUnitTest.java
        SpiderHtmlParserUnitTest.java
        SpiderParserTestUtils.java
        SpiderSitemapXMLParserUnitTest.java
        SpiderTextParserUnitTest.java
        users
        UserUnitTest.java
        UsersTableModelUnitTest.java
        utils
        ApiUtilsUnitTest.java
        BoyerMooreMatcherUnitTest.java
        ByteBuilderUnitTest.java
        HirshbergMatcherUnitTest.java
        LocaleUtilsUnitTest.java
        XMLStringUtilUnitTest.java
        view
        AbstractMultipleOptionsBaseTableModelUnitTest.java
        JCheckBoxTreeUnitTest.java
        LayoutHelperUnitTest.java
        ListModelTestUtils.java
        TableModelTestUtils.java
        widgets
        UsersListModelUnitTest.java

/*
 * Zed Attack Proxy (ZAP) and its related class files.
 * 
 * ZAP is an HTTP/HTTPS proxy for assessing web application security.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at 
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0 
 *   
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for the specific language governing permissions and 
 * limitations under the License. 
 */
package org.zaproxy.zap.spider;

import java.net.CookieManager;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.log4j.Logger;
import org.parosproxy.paros.model.Model;
import org.parosproxy.paros.network.ConnectionParam;
import org.parosproxy.paros.network.HttpMessage;
import org.parosproxy.paros.network.HttpRequestHeader;
import org.parosproxy.paros.network.HttpSender;
import org.zaproxy.zap.extension.spider.ExtensionSpider;
import org.zaproxy.zap.model.Context;
import org.zaproxy.zap.spider.filters.DefaultFetchFilter;
import org.zaproxy.zap.spider.filters.DefaultParseFilter;
import org.zaproxy.zap.spider.filters.FetchFilter;
import org.zaproxy.zap.spider.filters.FetchFilter.FetchStatus;
import org.zaproxy.zap.spider.filters.ParseFilter;
import org.zaproxy.zap.spider.parser.SpiderParser;
import org.zaproxy.zap.users.User;

/**
 * The Class Spider.
 */
public class Spider {

	/** The spider parameters. */
	private SpiderParam spiderParam;

	/** The connection parameters. */
	private ConnectionParam connectionParam;

	/** The model. */
	private Model model;

	/** The listeners for Spider related events. */
	private List<SpiderListener> listeners;

	/** If the spider is currently paused. */
	private volatile boolean paused;

	/** The the spider is currently stopped. */
	private volatile boolean stopped;

	/** The pause lock, used for locking access to the "paused" variable. */
	private ReentrantLock pauseLock = new ReentrantLock();

	/** The controller that manages the spidering process. */
	private SpiderController controller;

	/**
	 * The condition that is used for the threads in the pool to wait on, when the Spider crawling
	 * is paused. When the Spider is resumed, all the waiting threads are awakened.
	 */
	private Condition pausedCondition = pauseLock.newCondition();

	/** The thread pool for spider workers. */
	private ExecutorService threadPool;

	/** The default fetch filter. */
	private DefaultFetchFilter defaultFetchFilter;
	
	/** The seed list. */
	private LinkedHashSet<URI> seedList;
	
	/** The extension. */
	private ExtensionSpider extension;

	/** The Constant log. */
	private static final Logger log = Logger.getLogger(Spider.class);

	/** The HTTP sender used to effectively send the data. */
	private HttpSender httpSender;

	/** The count of the tasks finished. */
	private int tasksDoneCount;

	/** The total count of all the submitted tasks. */
	private int tasksTotalCount;

	/** The cookie manager. */
	private CookieManager cookieManager;
	
	/** The scan context. If null, the scan is not performed in a context. */
	private Context scanContext;

	/** The scan user. */
	private User scanUser;
	
	/** The time the scan was started */
	private long timeStarted;

	/**
	 * The initialized marks if the spidering process is completely started. It solves the problem
	 * when the first task is processed and the process is finished before the other seeds are
	 * added.
	 */
	private boolean initialized;

	/**	we do not want to recurse into an SVN folder, or a subfolder of an SVN folder, if one was created from a previous Spider run */
	private static final Pattern svnUrlPattern = Pattern.compile("\\.svn/"); //case sensitive

	/**	we do not want to recurse into a Git folder, or a subfolder of a Git folder, if one was created from a previous Spider run */
	private static final Pattern gitUrlPattern = Pattern.compile("\\.git/"); //case sensitive

	private final String id;

	/**
	 * Instantiates a new spider.
	 *
	 * @param extension the extension
	 * @param spiderParam the spider param
	 * @param connectionParam the connection param
	 * @param model the model
	 * @param scanContext if a scan context is set, only URIs within the context are fetched and processed
	 * @deprecated (2.6.0) Use {@link #Spider(String, ExtensionSpider, SpiderParam, ConnectionParam, Model, Context)}
	 *             instead.
	 */
	@Deprecated
	public Spider(ExtensionSpider extension, SpiderParam spiderParam, ConnectionParam connectionParam,
			Model model, Context scanContext) {
		this("?", extension, spiderParam, connectionParam, model, scanContext);
	}

	/**
	 * Constructs a {@code Spider} with the given data.
	 * 
	 * @param id the ID of the spider, usually a unique integer
	 * @param extension the extension
	 * @param spiderParam the spider param
	 * @param connectionParam the connection param
	 * @param model the model
	 * @param scanContext if a scan context is set, only URIs within the context are fetched and processed
	 * @since 2.6.0
	 */
	public Spider(String id, ExtensionSpider extension, SpiderParam spiderParam, ConnectionParam connectionParam,
			Model model, Context scanContext) {
		super();
		log.info("Spider initializing...");
		this.id = id;
		this.spiderParam = spiderParam;
		this.connectionParam = connectionParam;
		this.model = model;
		this.extension = extension;
		this.controller = new SpiderController(this, extension.getCustomParsers());
		this.listeners = new LinkedList<>();
		this.seedList = new LinkedHashSet<>();
		this.cookieManager = new CookieManager();
		this.scanContext = scanContext;
		
		init();
	}

	/**
	 * Initialize the spider.
	 */
	private void init() {
		this.paused = false;
		this.stopped = true;
		this.tasksDoneCount = 0;
		this.tasksTotalCount = 0;
		this.initialized = false;

		// Add a default fetch filter and any custom ones
		defaultFetchFilter = new DefaultFetchFilter();
		this.addFetchFilter(defaultFetchFilter);
		
		for (FetchFilter filter : extension.getCustomFetchFilters()) {
			this.addFetchFilter(filter);
		}

		// Add a default parse filter and any custom ones
		this.addParseFilter(new DefaultParseFilter());
		for (ParseFilter filter : extension.getCustomParseFilters())
			this.addParseFilter(filter);
		
		// Add the scan context, if any
		defaultFetchFilter.setScanContext(this.scanContext);
		defaultFetchFilter.setDomainsAlwaysInScope(spiderParam.getDomainsAlwaysInScopeEnabled());

	}

	/* SPIDER Related */
	/**
	 * Adds a new seed for the Spider.
	 * 
	 * @param msg the message used for seed. The request URI is used from the Request Header
	 */
	public void addSeed(HttpMessage msg) {
		URI uri = msg.getRequestHeader().getURI();
		addSeed(uri);
	}

	/**
	 * Adds a new seed for the Spider.
	 * 
	 * @param uri the uri
	 */
	public void addSeed(URI uri) {
		// Update the scope of the spidering process
		String host = null;

		try {
			host = uri.getHost();
			defaultFetchFilter.addScopeRegex(host);
		} catch (URIException e) {
			log.error("There was an error while adding seed value: " + uri, e);
			return;
		}
		// Add the seed to the list -- it will be added to the task list only when the spider is
		// started
		this.seedList.add(uri);
		// Add the appropriate 'robots.txt' as a seed
		if (getSpiderParam().isParseRobotsTxt()) {
			addRootFileSeed(uri, "robots.txt");
		}
		// Add the appropriate 'sitemap.xml' as a seed
		if (getSpiderParam().isParseSitemapXml()) {
			addRootFileSeed(uri, "sitemap.xml");
		}
		// And add '.svn/entries' as a seed, for SVN based spidering
		if (getSpiderParam().isParseSVNEntries()) {
			addFileSeed(uri, ".svn/entries", svnUrlPattern);
			addFileSeed(uri, ".svn/wc.db", svnUrlPattern);
		}

		// And add '.git/index' as a seed, for Git based spidering
		if (getSpiderParam().isParseGit()) {
			addFileSeed(uri, ".git/index", gitUrlPattern);
		}

	}

	/**
	 * Adds a file seed, with the given file name, at the root of the base URI.
	 * <p>
	 * For example, with base URI as {@code http://example.com/some/path/file.html} and file name as {@code sitemap.xml} it's
	 * added the seed {@code http://example.com/sitemap.xml}.
	 *
	 * @param baseUri the base URI.
	 * @param fileName the file name.
	 */
	private void addRootFileSeed(URI baseUri, String fileName) {
		String seed = buildUri(baseUri.getScheme(), baseUri.getRawHost(), baseUri.getPort(), "/" + fileName);
		try {
			this.seedList.add(new URI(seed, true));
		} catch (Exception e) {
			log.warn("Error while creating [" + fileName + "] seed: " + seed, e);
		}
	}

	/**
	 * Creates a URI (string) with the given scheme, host, port and path. The port is only added if not the default for the
	 * given scheme.
	 *
	 * @param scheme the scheme, {@code http} or {@code https}.
	 * @param host the name of the host.
	 * @param port the port.
	 * @param path the path, should start with {@code /}.
	 * @return the URI with the provided components.
	 */
	private static String buildUri(String scheme, char[] host, int port, String path) {
		StringBuilder strBuilder = new StringBuilder(150);
		strBuilder.append(scheme).append("://").append(host);
		if (!isDefaultPort(scheme, port)) {
			strBuilder.append(':').append(port);
		}
		strBuilder.append(path);
		return strBuilder.toString();
	}

	/**
	 * Adds a file seed using the given base URI, file name and condition.
	 * <p>
	 * The file is added as part of the path, without existing file name. For example, with base URI as
	 * {@code http://example.com/some/path/file.html} and file name as {@code .git/index} it's added the seed
	 * {@code http://example.com/some/path/.git/index}.
	 * <p>
	 * If the given condition matches the base URI's path without the file name, the file seed is not added (this prevents
	 * adding the seed once again).
	 *
	 * @param baseUri the base URI to construct the file seed.
	 * @param fileName the name of the file seed.
	 * @param condition the condition to add the file seed.
	 */
	private void addFileSeed(URI baseUri, String fileName, Pattern condition) {
		String fullpath = baseUri.getEscapedPath();
		if (fullpath == null) {
			fullpath = "";
		}

		String name = baseUri.getEscapedName();
		if (name == null) {
			name = "";
		}

		String pathminusfilename = fullpath.substring(0, fullpath.lastIndexOf(name));
		if (pathminusfilename.isEmpty()) {
			pathminusfilename = "/";
		}

		if (condition.matcher(pathminusfilename).find()) {
			return;
		}

		String uri = buildUri(baseUri.getScheme(), baseUri.getRawHost(), baseUri.getPort(), pathminusfilename + fileName);
		try {
			this.seedList.add(new URI(uri, true));
		} catch (Exception e) {
			log.warn("Error while creating a seed URI for file [" + fileName + "] from [" + baseUri + "] using [" + uri + "]:",
					e);
		}
	}

	/**
	 * Tells whether or not the given port is the default for the given scheme.
	 * <p>
	 * Only intended to be used with HTTP/S schemes.
	 *
	 * @param scheme the scheme.
	 * @param port the port.
	 * @return {@code true} if the given port is the default for the given scheme, {@code false} otherwise.
	 */
	private static boolean isDefaultPort(String scheme, int port) {
		if (port == -1) {
			return true;
		}

		if ("http".equalsIgnoreCase(scheme)) {
			return port == 80;
		}

		if ("https".equalsIgnoreCase(scheme)) {
			return port == 443;
		}

		return false;
	}

	/**
	 * Sets the exclude list which contains a List of strings, defining the uris that should be
	 * excluded.
	 * 
	 * @param excludeList the new exclude list
	 */
	public void setExcludeList(List<String> excludeList) {
		log.debug("New Exclude list: " + excludeList);
		defaultFetchFilter.setExcludeRegexes(excludeList);
	}

	/**
	 * Adds a new fetch filter to the spider.
	 * 
	 * @param filter the filter
	 */
	public void addFetchFilter(FetchFilter filter) {
		controller.addFetchFilter(filter);
	}

	/**
	 * Adds a new parse filter to the spider.
	 * 
	 * @param filter the filter
	 */
	public void addParseFilter(ParseFilter filter) {
		controller.addParseFilter(filter);
	}

	/**
	 * Gets the http sender. Can be called from the SpiderTask.
	 * 
	 * @return the http sender
	 */
	protected HttpSender getHttpSender() {
		return httpSender;
	}

	/**
	 * Gets the spider parameters. Can be called from the SpiderTask.
	 * 
	 * @return the spider parameters
	 */
	protected SpiderParam getSpiderParam() {
		return spiderParam;
	}

	protected ConnectionParam getConnectionParam() {
		return connectionParam;
	}

	/**
	 * Gets the controller.
	 * 
	 * @return the controller
	 */
	protected SpiderController getController() {
		return controller;
	}

	/**
	 * Gets the cookie manager.
	 * 
	 * @return the cookie manager
	 */
	protected CookieManager getCookieManager() {
		return cookieManager;
	}

	/**
	 * Gets the model.
	 * 
	 * @return the model
	 */
	protected Model getModel() {
		return this.model;
	}

	/**
	 * Submit a new task to the spidering task pool.
	 * 
	 * @param task the task
	 */
	protected synchronized void submitTask(SpiderTask task) {
		if (isStopped()) {
			log.debug("Submitting task skipped (" + task + ") as the Spider process is stopped.");
			return;
		}
		if (isTerminated()) {
			log.debug("Submitting task skipped (" + task + ") as the Spider process is terminated.");
			return;
		}
		this.tasksTotalCount++;
		try {
			this.threadPool.execute(task);
		} catch (RejectedExecutionException e) {
			if (log.isDebugEnabled()) {
				log.debug("Submitted task was rejected (" + task + "), spider state: [stopped=" + isStopped() + ", terminated="
						+ isTerminated() + "].");
			}
		}
	}

	/**
	 * Gets the extension.
	 *
	 * @return the extension
	 */
	protected ExtensionSpider getExtensionSpider() {
		return this.extension;
	}

	/* SPIDER PROCESS maintenance - pause, resume, shutdown, etc. */

	/**
	 * Starts the Spider crawling.
	 */
	public void start() {

		log.info("Starting spider...");
		
		this.timeStarted = System.currentTimeMillis();

		fetchFilterSeeds();

		// Check if seeds are available, otherwise the Spider will start, but will not have any
		// seeds and will not stop.
		if (seedList == null || seedList.isEmpty()) {
			log.warn("No seeds available for the Spider. Cancelling scan...");
			notifyListenersSpiderComplete(false);
			notifyListenersSpiderProgress(100, 0, 0);
			return;
		}

		if (scanUser != null)
			log.info("Scan will be performed from the point of view of User: " + scanUser.getName());

		this.controller.init();
		this.stopped = false;
		this.paused = false;
		this.initialized = false;

		// Initialize the thread pool
		this.threadPool = Executors.newFixedThreadPool(spiderParam.getThreadCount(),
				new SpiderThreadFactory("ZAP-SpiderThreadPool-" + id + "-thread-"));

		// Initialize the HTTP sender
		httpSender = new HttpSender(connectionParam, true, HttpSender.SPIDER_INITIATOR);
		// Do not follow redirections because the request is not updated, the redirections will be
		// handled manually.
		httpSender.setFollowRedirect(false);

		// Add the seeds
		for (URI uri : seedList) {
			if (log.isDebugEnabled()) {
				log.debug("Adding seed for spider: " + uri);
			}
			controller.addSeed(uri, HttpRequestHeader.GET);
		}
		// Mark the process as completely initialized
		initialized = true;
	}

	/**
	 * Filters the seed list using the current fetch filters, preventing any non-valid seed from being accessed.
	 * 
	 * @see #seedList
	 * @see FetchFilter
	 * @see SpiderController#getFetchFilters()
	 * @since 2.5.0
	 */
	private void fetchFilterSeeds() {
		if (seedList == null || seedList.isEmpty()) {
			return;
		}

		for (Iterator<URI> it = seedList.iterator(); it.hasNext();) {
			URI seed = it.next();
			for (FetchFilter filter : controller.getFetchFilters()) {
				FetchStatus filterReason = filter.checkFilter(seed);
				if (filterReason != FetchStatus.VALID) {
					if (log.isDebugEnabled()) {
						log.debug("Seed: " + seed + " was filtered with reason: " + filterReason);
					}
					it.remove();
					break;
				}
			}
		}
	}

	/**
	 * Stops the Spider crawling. Must not be called from any of the threads in the thread pool.
	 */
	public void stop() {
		if (stopped) {
			return;
		}
		this.stopped = true;
		log.info("Stopping spidering process by request.");
		
		if (this.paused) {
			// Have to resume first or we get a deadlock
			this.resume();
		}
		
		// Issue the shutdown command
		this.threadPool.shutdown();
		try {
			if (!this.threadPool.awaitTermination(2, TimeUnit.SECONDS)) {
				log.warn("Failed to await for all spider threads to stop in the given time (2s)...");
				for (Runnable task : this.threadPool.shutdownNow()) {
					((SpiderTask) task).cleanup();
				}
			}
		} catch (InterruptedException ignore) {
			log.warn("Interrupted while awaiting for all spider threads to stop...");
		}
		if (httpSender != null) {
			this.getHttpSender().shutdown();
			httpSender = null;
		}

		// Notify the controller to clean up memory
		controller.reset();
		this.threadPool = null;

		// Notify the listeners -- in the meanwhile
		notifyListenersSpiderComplete(false);
	}

	/**
	 * The Spidering process is complete.
	 */
	private void complete() {
		if (stopped) {
			return;
		}

		log.info("Spidering process is complete. Shutting down...");
		this.stopped = true;
		if (httpSender != null) {
			this.getHttpSender().shutdown();
			httpSender = null;
		}

		// Notify the controller to clean up memory
		controller.reset();

		// Issue the shutdown command on a separate thread, as the current thread is most likely one
		// from the pool
		new Thread(new Runnable() {
			@Override
			public void run() {
				if (threadPool != null) {
					threadPool.shutdown();
				}
				// Notify the listeners -- in the meanwhile
				notifyListenersSpiderComplete(true);
				controller.reset();
				threadPool = null;
			}
		}, "ZAP-SpiderShutdownThread-" + id).start();
	}

	/**
	 * Pauses the Spider crawling.
	 */
	public void pause() {
		pauseLock.lock();
		try {
			paused = true;
		} finally {
			pauseLock.unlock();
		}
	}

	/**
	 * Resumes the Spider crawling.
	 */
	public void resume() {
		pauseLock.lock();
		try {
			paused = false;
			// Wake up all threads that are currently paused
			pausedCondition.signalAll();
		} finally {
			pauseLock.unlock();
		}
	}

	/**
	 * Sets the spider so it will scan from the point of view of a user.
	 *
	 * @param user the user to be scanned as
	 */
	public void setScanAsUser(User user) {
		this.scanUser = user;
	}
	
	/**
	 * Gets the user that will be used in the scanning.
	 *
	 * @return the scan user
	 */
	protected User getScanUser(){
		return this.scanUser;
	}

	
	
	/**
	 * This method is run by each thread in the Thread Pool before the task execution. Particularly,
	 * it checks if the Spidering process is paused and, if it is, it waits on the corresponding
	 * condition for the process to be resumed. Called from the SpiderTask.
	 */
	protected void preTaskExecution() {
		checkPauseAndWait();
	}

	/**
	 * This method is run by Threads in the ThreadPool and checks if the scan is paused and, if it
	 * is, waits until it's unpaused.
	 */
	protected void checkPauseAndWait() {
		pauseLock.lock();
		try {
			while (paused && ! stopped) {
				pausedCondition.await();
			}
		} catch (InterruptedException e) {
		} finally {
			pauseLock.unlock();
		}
	}

	/**
	 * This method is run by each thread in the Thread Pool after the task execution. Particularly,
	 * it notifies the listeners of the progress and checks if the scan is complete. Called from the
	 * SpiderTask.
	 */
	protected synchronized void postTaskExecution() {
		if (stopped) {
			// Stopped, so don't count the task(s) as done.
			// (worker threads call this method even if the task was not really executed.)
			return;
		}
		tasksDoneCount++;
		int percentageComplete = tasksDoneCount * 100 / tasksTotalCount;

		// Compute the progress and notify the listeners
		this.notifyListenersSpiderProgress(percentageComplete, tasksDoneCount, tasksTotalCount - tasksDoneCount);

		// Check for ending conditions
		if (tasksDoneCount == tasksTotalCount && initialized) {
			this.complete();
		}
	}

	/**
	 * Checks if is paused.
	 * 
	 * @return true, if is paused
	 */
	public boolean isPaused() {
		return this.paused;
	}

	/**
	 * Checks if is stopped, i.e. a shutdown was issued or it is not running.
	 * 
	 * @return true, if is stopped
	 */
	public boolean isStopped() {
		if (! stopped && this.spiderParam.getMaxDuration() > 0) {
			// Check to see if the scan has exceeded the specified maxDuration
			if (TimeUnit.MILLISECONDS.toMinutes(System.currentTimeMillis() - this.timeStarted) > 
					this.spiderParam.getMaxDuration()) {
				log.info("Spidering process has exceeded maxDuration of " + this.spiderParam.getMaxDuration() + " minute(s)");
				this.complete();
			}
		}
		return stopped;
	}

	/**
	 * Checks if is terminated.
	 * 
	 * @return true, if is terminated
	 */
	public boolean isTerminated() {
		return threadPool.isTerminated();
	}

	/* LISTENERS SECTION */

	/**
	 * Adds a new spider listener.
	 * 
	 * @param listener the listener
	 */
	public void addSpiderListener(SpiderListener listener) {
		this.listeners.add(listener);
	}

	/**
	 * Removes a spider listener.
	 * 
	 * @param listener the listener
	 */
	public void removeSpiderListener(SpiderListener listener) {
		this.listeners.remove(listener);
	}

	/**
	 * Notifies all the listeners regarding the spider progress.
	 * 
	 * @param percentageComplete the percentage complete
	 * @param numberCrawled the number of pages crawled
	 * @param numberToCrawl the number of pages left to crawl
	 */
	protected synchronized void notifyListenersSpiderProgress(int percentageComplete, int numberCrawled,
			int numberToCrawl) {
		for (SpiderListener l : listeners) {
			l.spiderProgress(percentageComplete, numberCrawled, numberToCrawl);
		}
	}

	/**
	 * Notifies the listeners regarding a found uri.
	 * 
	 * @param uri the uri
	 * @param method the method used for fetching the resource
	 * @param status the {@link FetchStatus} stating if this uri will be processed, and, if not,
	 *            stating the reason of the filtering
	 */
	protected synchronized void notifyListenersFoundURI(String uri, String method, FetchStatus status) {
		for (SpiderListener l : listeners) {
			l.foundURI(uri, method, status);
		}
	}

	/**
	 * Notifies the listeners regarding a read uri.
	 * 
	 * @param msg the message
	 */
	protected synchronized void notifyListenersReadURI(HttpMessage msg) {
		for (SpiderListener l : listeners) {
			l.readURI(msg);
		}
	}

	/**
	 * Notifies the listeners that the spider is complete.
	 * 
	 * @param successful {@code true} if the spider completed successfully (e.g. was not stopped), {@code false} otherwise
	 */
	protected synchronized void notifyListenersSpiderComplete(boolean successful) {
		for (SpiderListener l : listeners) {
			l.spiderComplete(successful);
		}
	}

	public void addCustomParser(SpiderParser sp) {
		this.controller.addSpiderParser(sp);
	}

	private static class SpiderThreadFactory implements ThreadFactory {

		private final AtomicInteger threadNumber;
		private final String namePrefix;
		private final ThreadGroup group;

		public SpiderThreadFactory(String namePrefix) {
			threadNumber = new AtomicInteger(1);
			this.namePrefix = namePrefix;
			SecurityManager s = System.getSecurityManager();
			group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
		}

		@Override
		public Thread newThread(Runnable r) {
			Thread t = new Thread(group, r, namePrefix + threadNumber.getAndIncrement(), 0);
			if (t.isDaemon()) {
				t.setDaemon(false);
			}
			if (t.getPriority() != Thread.NORM_PRIORITY) {
				t.setPriority(Thread.NORM_PRIORITY);
			}
			return t;
		}
	}
}