/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.spider.filters; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.httpclient.URIException; import org.parosproxy.paros.network.HttpMessage; import org.parosproxy.paros.network.HttpStatusCode; /** * The DefaultParseFilter is an implementation of a {@link ParseFilter} that is default for * spidering process. Its filter rules are the following: * <ul> * <li>the resource body should be under MAX_RESPONSE_BODY_SIZE bytes, otherwise it's probably a * binary resource.</li> * <li>the resource must be of parsable type (text, html, xml, javascript). Actually, the content * type should be text/...</li> * </ul> */ public class DefaultParseFilter extends ParseFilter { /** * The Constant MAX_RESPONSE_BODY_SIZE defining the size of response body that is considered too * big for a parsable file. */ public static final int MAX_RESPONSE_BODY_SIZE = 512000; /** * a pattern to match the SQLite based ".svn/wc.db" file name. */ private static final Pattern svnSQLiteFilenamePattern = Pattern.compile (".*/\\.svn/wc.db$"); /** * a pattern to match the XML based ".svn/entries" file name. */ private static final Pattern svnXMLFilenamePattern = Pattern.compile (".*/\\.svn/entries$"); /** * a pattern to match the Git index file. */ private static final Pattern gitFilenamePattern = Pattern.compile (".*/\\.git/index$"); @Override public boolean isFiltered(HttpMessage responseMessage) { //if it's a file ending in "/.svn/entries", or "/.svn/wc.db", the SVN Entries or Git parsers will process it //regardless of type, and regardless of whether it exceeds the file size restriction below. Matcher svnXMLFilenameMatcher, svnSQLiteFilenameMatcher, gitFilenameMatcher; try { String fullfilename = responseMessage.getRequestHeader().getURI().getPath(); //handle null paths if (fullfilename == null) fullfilename = ""; svnSQLiteFilenameMatcher = svnSQLiteFilenamePattern.matcher(fullfilename); svnXMLFilenameMatcher = svnXMLFilenamePattern.matcher(fullfilename); gitFilenameMatcher = gitFilenamePattern.matcher(fullfilename); if ( svnSQLiteFilenameMatcher.find() || svnXMLFilenameMatcher.find() || gitFilenameMatcher.find()) return false; } catch (URIException e) { //give other parsers a chance to parse it. log.error(e); } // Check response body size if (responseMessage.getResponseBody().length() > MAX_RESPONSE_BODY_SIZE) { if (log.isDebugEnabled()) { log.debug("Resource too large: " + responseMessage.getRequestHeader().getURI()); } return true; } // If it's a redirection, accept it, as the SpiderRedirectParser will process it if (HttpStatusCode.isRedirection(responseMessage.getResponseHeader().getStatusCode())) return false; // Check response type. if (!responseMessage.getResponseHeader().isText()) { if (log.isDebugEnabled()) { log.debug("Resource is not text: " + responseMessage.getRequestHeader().getURI()); } return true; } return false; } }