/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.spider.parser;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Source;
import org.parosproxy.paros.network.HttpMessage;
/**
* The Class SpiderTextParser is used for parsing of simple text (non-HTML) files, gathering
* resource urls from them. For example it is used for parsing CSS, less, javascript files,
* searching for urls.
*/
public class SpiderTextParser extends SpiderParser {
/** The Constant urlPattern defining the pattern for an url. */
private static final Pattern patternURL = Pattern.compile("\\W(http(s?)://[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+)", Pattern.CASE_INSENSITIVE);
@Override
public boolean parseResource(HttpMessage message, Source source, int depth) {
log.debug("Parsing a non-HTML text resource.");
String baseURL = message.getRequestHeader().getURI().toString();
// Use a simple pattern matcher to find urls
Matcher matcher = patternURL.matcher(message.getResponseBody().toString());
while (matcher.find()) {
String s = matcher.group(1);
processURL(message, depth, s, baseURL);
}
return false;
}
@Override
public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyConsumed) {
// Fall-back parser - if it's a text, non-HTML response which has not already been processed
return !wasAlreadyConsumed && message.getResponseHeader().isText() && !message.getResponseHeader().isHtml();
}
}