/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Copyright 2016 The ZAP Development Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.spider.filters;
import java.util.Arrays;
import java.util.Locale;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.log4j.Logger;
/**
* A {@code FetchFilter} that filters based on a HTTP or HTTPS {@code URI}.
* <p>
* The filtered {@code URI}s are required to start with the {@code URI} (the prefix) to be considered valid.
*
* @since 2.5.0
* @see #checkFilter(URI)
*/
public class HttpPrefixFetchFilter extends FetchFilter {
private static final Logger LOGGER = Logger.getLogger(HttpPrefixFetchFilter.class);
/** The normalised form of HTTP scheme, that is, all letters lowercase. */
private static final String HTTP_SCHEME = "http";
/** The normalised form of HTTPS scheme, that is, all letters lowercase. */
private static final String HTTPS_SCHEME = "https";
/** The port number that indicates that a port is the default of a scheme. */
private static final int DEFAULT_PORT = -1;
/** The port number that indicates that a port is of an unknown scheme (that is, non HTTP and HTTPS). */
private static final int UNKNOWN_PORT = -2;
/** The default port number of HTTP scheme. */
private static final int DEFAULT_HTTP_PORT = 80;
/** The default port number of HTTPS scheme. */
private static final int DEFAULT_HTTPS_PORT = 443;
/** The scheme used for filtering. Never {@code null}. */
private final String scheme;
/** The host used for filtering. Never {@code null}. */
private final String host;
/** The port used for filtering. */
private final int port;
/** The path used for filtering. Might be {@code null}. */
private final char[] path;
/**
* Constructs a {@code HttpPrefixFetchFilter} using the given {@code URI} as prefix.
* <p>
* The user info, query component and fragment of the given {@code URI} are discarded. The scheme and domain comparisons are
* done in a case insensitive way while the path component comparison is case sensitive.
*
* @param prefix the {@code URI} that will be used as prefix
* @throws IllegalArgumentException if any of the following conditions is {@code true}:
* <ul>
* <li>The given {@code prefix} is {@code null};</li>
* <li>The given {@code prefix} has {@code null} scheme;</li>
* <li>The scheme of the given {@code prefix} is not HTTP or HTTPS;</li>
* <li>The given {@code prefix} has {@code null} host;</li>
* <li>The given {@code prefix} has malformed host.</li>
* </ul>
*/
public HttpPrefixFetchFilter(URI prefix) {
if (prefix == null) {
throw new IllegalArgumentException("Parameter prefix must not be null.");
}
char[] rawScheme = prefix.getRawScheme();
if (rawScheme == null) {
throw new IllegalArgumentException("Parameter prefix must have a scheme.");
}
String normalisedScheme = normalisedScheme(rawScheme);
if (!isHttpOrHttps(normalisedScheme)) {
throw new IllegalArgumentException("The prefix's scheme must be HTTP or HTTPS.");
}
scheme = normalisedScheme;
if (prefix.getRawHost() == null) {
throw new IllegalArgumentException("Parameter prefix must have a host.");
}
try {
host = normalisedHost(prefix);
} catch (URIException e) {
throw new IllegalArgumentException("Failed to obtain the host from the prefix:", e);
}
port = normalisedPort(scheme, prefix.getPort());
path = prefix.getRawPath();
}
/**
* Returns the normalised form of the given {@code scheme}.
* <p>
* The normalisation process consists in converting the scheme to lowercase, if {@code null} it is returned an empty
* {@code String}.
*
* @param scheme the scheme that will be normalised
* @return a {@code String} with the host scheme, never {@code null}
* @see URI#getRawScheme()
*/
private static String normalisedScheme(char[] scheme) {
if (scheme == null) {
return "";
}
return new String(scheme).toLowerCase(Locale.ROOT);
}
/**
* Tells whether or not the given {@code scheme} is HTTP or HTTPS.
*
* @param scheme the normalised scheme, might be {@code null}
* @return {@code true} if the {@code scheme} is HTTP or HTTPS, {@code false} otherwise
*/
private static boolean isHttpOrHttps(String scheme) {
return isHttp(scheme) || isHttps(scheme);
}
/**
* Tells whether or not the given {@code scheme} is HTTP.
*
* @param scheme the normalised scheme, might be {@code null}
* @return {@code true} if the {@code scheme} is HTTP, {@code false} otherwise
*/
private static boolean isHttp(String scheme) {
return HTTP_SCHEME.equals(scheme);
}
/**
* Tells whether or not the given {@code scheme} is HTTPS.
*
* @param scheme the normalised scheme, might be {@code null}
* @return {@code true} if the {@code scheme} is HTTPS, {@code false} otherwise
*/
private static boolean isHttps(String scheme) {
return HTTPS_SCHEME.equals(scheme);
}
/**
* Returns the normalised form of the host of the given {@code uri}.
* <p>
* The normalisation process consists in converting the host to lowercase, if {@code null} it is returned an empty
* {@code String}.
*
* @param uri the URI whose host will be extracted and normalised
* @return a {@code String} with the host normalised, never {@code null}
* @throws URIException if the host of the given {@code uri} is malformed
*/
private static String normalisedHost(URI uri) throws URIException {
if (uri.getRawHost() == null) {
return "";
}
return uri.getHost().toLowerCase(Locale.ROOT);
}
/**
* Returns the normalised form of the given {@code port}, based on the given {@code scheme}.
* <p>
* If the port is non-default (as given by {@link #DEFAULT_PORT}), it's immediately returned. Otherwise, for schemes HTTP
* and HTTPS it's returned 80 and 443, respectively, for any other scheme it's returned {@link #UNKNOWN_PORT}.
*
* @param scheme the (normalised) scheme of the URI where the port was defined
* @param port the port to normalise
* @return the normalised port
* @see #normalisedScheme(char[])
* @see URI#getPort()
*/
private static int normalisedPort(String scheme, int port) {
if (port != DEFAULT_PORT) {
return port;
}
if (isHttp(scheme)) {
return DEFAULT_HTTP_PORT;
}
if (isHttps(scheme)) {
return DEFAULT_HTTPS_PORT;
}
return UNKNOWN_PORT;
}
/**
* Gets the prefix normalised, as it is used to filter the {@code URI}s.
*
* @return a {@code String} with the prefix normalised
* @see #checkFilter(URI)
*/
public String getNormalisedPrefix() {
StringBuilder strBuilder = new StringBuilder();
strBuilder.append(scheme).append("://").append(host);
if (!isDefaultHttpOrHttpsPort(scheme, port)) {
strBuilder.append(':').append(port);
}
if (path != null) {
strBuilder.append(path);
}
return strBuilder.toString();
}
/**
* Tells whether or not the given {@code port} is the default for the given {@code scheme}.
* <p>
* The method returns always {@code false} for non HTTP or HTTPS schemes.
*
* @param scheme the scheme of a URI, might be {@code null}
* @param port the port of a URI
* @return {@code true} if the {@code port} is the default for the given {@code scheme}, {@code false} otherwise
*/
private static boolean isDefaultHttpOrHttpsPort(String scheme, int port) {
if (port == DEFAULT_HTTP_PORT && isHttp(scheme)) {
return true;
}
if (port == DEFAULT_HTTPS_PORT && isHttps(scheme)) {
return true;
}
return false;
}
/**
* Filters any URI that does not start with the defined prefix.
*
* @return {@code FetchStatus.VALID} if the {@code uri} starts with the {@code prefix}, {@code FetchStatus.OUT_OF_SCOPE}
* otherwise
*/
@Override
public FetchStatus checkFilter(URI uri) {
if (uri == null) {
return FetchStatus.OUT_OF_SCOPE;
}
String otherScheme = normalisedScheme(uri.getRawScheme());
if (port != normalisedPort(otherScheme, uri.getPort())) {
return FetchStatus.OUT_OF_SCOPE;
}
if (!scheme.equals(otherScheme)) {
return FetchStatus.OUT_OF_SCOPE;
}
if (!hasSameHost(uri)) {
return FetchStatus.OUT_OF_SCOPE;
}
if (!startsWith(uri.getRawPath(), path)) {
return FetchStatus.OUT_OF_SCOPE;
}
return FetchStatus.VALID;
}
/**
* Tells whether or not the given {@code uri} has the same host as required by this prefix.
* <p>
* For malformed hosts it returns always {@code false}.
*
* @param uri the {@code URI} whose host will be checked
* @return {@code true} if the host is same, {@code false} otherwise
*/
private boolean hasSameHost(URI uri) {
try {
return host.equals(normalisedHost(uri));
} catch (URIException e) {
LOGGER.warn("Failed to normalise host: " + Arrays.toString(uri.getRawHost()), e);
}
return false;
}
/**
* Tells whether or not the given {@code array} starts with the given {@code prefix}.
* <p>
* The {@code prefix} might be {@code null} in which case it's considered that the {@code array} starts with the prefix.
*
* @param array the array that will be tested if starts with the prefix, might be {@code null}
* @param prefix the array used as prefix, might be {@code null}
* @return {@code true} if the {@code array} starts with the {@code prefix}, {@code false} otherwise
*/
private static boolean startsWith(char[] array, char[] prefix) {
if (prefix == null) {
return true;
}
if (array == null) {
return false;
}
int length = prefix.length;
if (array.length < length) {
return false;
}
for (int i = 0; i < length; i++) {
if (prefix[i] != array[i]) {
return false;
}
}
return true;
}
}