/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.spider.parser; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.htmlparser.jericho.Source; import org.parosproxy.paros.network.HttpMessage; /** * The Class SpiderTextParser is used for parsing of simple text (non-HTML) files, gathering * resource urls from them. For example it is used for parsing CSS, less, javascript files, * searching for urls. */ public class SpiderTextParser extends SpiderParser { /** The Constant urlPattern defining the pattern for an url. */ private static final Pattern patternURL = Pattern.compile("\\W(http(s?)://[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+)", Pattern.CASE_INSENSITIVE); @Override public boolean parseResource(HttpMessage message, Source source, int depth) { log.debug("Parsing a non-HTML text resource."); String baseURL = message.getRequestHeader().getURI().toString(); // Use a simple pattern matcher to find urls Matcher matcher = patternURL.matcher(message.getResponseBody().toString()); while (matcher.find()) { String s = matcher.group(1); processURL(message, depth, s, baseURL); } return false; } @Override public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyConsumed) { // Fall-back parser - if it's a text, non-HTML response which has not already been processed return !wasAlreadyConsumed && message.getResponseHeader().isText() && !message.getResponseHeader().isHtml(); } }