/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.spider.filters; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import org.apache.commons.httpclient.URI; import org.apache.commons.httpclient.URIException; import org.zaproxy.zap.model.Context; import org.zaproxy.zap.spider.DomainAlwaysInScopeMatcher; /** * The DefaultFetchFilter is an implementation of a FetchFilter that is default for spidering process. Its * filter rules are the following: * <ul> * <li>the resource protocol/scheme must be 'HTTP' or 'HTTPs'.</li> * <li>the resource must be found in the scope (domain) of the spidering process.</li> * <li>the resource must be not be excluded by user request - exclude list.</li> * </ul> * */ public class DefaultFetchFilter extends FetchFilter { /** The scope. */ private Set<String> scopes = new LinkedHashSet<>(); private List<DomainAlwaysInScopeMatcher> domainsAlwaysInScope = Collections.emptyList(); /** The exclude list. */ private List<String> excludeList = null; private Context scanContext; @Override public FetchStatus checkFilter(URI uri) { log.debug("Checking: " + uri); // Protocol check String scheme = uri.getScheme(); if (scheme == null || (!scheme.equalsIgnoreCase("http") && !scheme.equalsIgnoreCase("https"))) { return FetchStatus.ILLEGAL_PROTOCOL; } try { // Context check if (this.scanContext != null) { if (!this.scanContext.isInContext(uri.toString())) { return FetchStatus.OUT_OF_CONTEXT; } } else { // Scope check String host = uri.getHost(); if (!isDomainInScope(host) && !isDomainAlwaysInScope(host)) { return FetchStatus.OUT_OF_SCOPE; } } // Check if any of the exclusion regexes match. if (isExcluded(uri.toString())) { return FetchStatus.USER_RULES; } } catch (URIException e) { log.warn("Error while fetching host for uri: " + uri, e); return FetchStatus.OUT_OF_SCOPE; } return FetchStatus.VALID; } /** * Tells whether or not the given URI is excluded. * * @param uri the URI to check * @return {@code true} if the URI is excluded, {@code false} otherwise. */ private boolean isExcluded(String uri) { if (excludeList == null || excludeList.isEmpty()) { return false; } for (String ex : excludeList) { if (uri.matches(ex)) { return true; } } return false; } /** * Tells whether or not the given domain is one of the domains in scope. * * @param domain the domain to check * @return {@code true} if it's a domain in scope, {@code false} otherwise. * @see #scopes * @see #isDomainAlwaysInScope(String) */ private boolean isDomainInScope(String domain) { for (String scope : scopes) { if (domain.matches(scope)) { return true; } } return false; } /** * Tells whether or not the given domain is one of the domains always in scope. * * @param domain the domain to check * @return {@code true} if it's a domain always in scope, {@code false} otherwise. * @see #domainsAlwaysInScope * @see #isDomainInScope(String) */ private boolean isDomainAlwaysInScope(String domain) { for (DomainAlwaysInScopeMatcher domainInScope : domainsAlwaysInScope) { if (domainInScope.matches(domain)) { return true; } } return false; } /** * Adds a new domain to the scope list of the spider process. * * @param scope the scope */ public void addScopeRegex(String scope) { this.scopes.add(scope); } /** * Sets the domains that will be considered as always in scope. * * @param domainsAlwaysInScope the list containing all domains that are always in scope. * @since 2.3.0 */ public void setDomainsAlwaysInScope(List<DomainAlwaysInScopeMatcher> domainsAlwaysInScope) { if (domainsAlwaysInScope == null || domainsAlwaysInScope.isEmpty()) { this.domainsAlwaysInScope = Collections.emptyList(); } else { this.domainsAlwaysInScope = domainsAlwaysInScope; } } /** * Sets the regexes which are used for checking if an uri should be skipped. * * @param excl the new exclude regexes */ public void setExcludeRegexes(List<String> excl) { excludeList = excl; } /** * Sets the scan context. If set, only uris that are part of the context are * considered valid for fetching. * * @param scanContext the new scan context */ public void setScanContext(Context scanContext) { this.scanContext = scanContext; } }