/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.spider.parser; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.htmlparser.jericho.Source; import org.apache.commons.lang.StringEscapeUtils; import org.parosproxy.paros.network.HttpMessage; /** * Used to parse OData content in Atom format.<p> * It's derived from the SpiderTextParser. Even if the format of the file is XML we will process it as a simple text file */ public class SpiderODataAtomParser extends SpiderParser { /** The Constant urlPattern defining the pattern for an url. */ private static final Pattern patternURL = Pattern.compile("href=\\\"([\\w();&'/,=\\-]*)\\\""); /** the Constant patternBase defines the pattern for a base url */ private static final Pattern patternBase = Pattern.compile("base=\"(http(s?)://[^\\x00-\\x1f\"'\\s<>#]+)\""); @Override public boolean parseResource(HttpMessage message, Source source, int depth) { log.debug("Parsing an OData Atom resource."); // Get the context (base url) String baseURL = message.getRequestHeader().getURI().toString(); // Use a simple pattern matcher to find urls (absolute and relative) String bodyAsStr = message.getResponseBody().toString(); // Handle base tag if any // xml:base="http://myserver:8001/remoting/myapp.svc/" Matcher matcher = patternBase.matcher(bodyAsStr); if (matcher.find()) { baseURL = matcher.group(1); baseURL = StringEscapeUtils.unescapeXml(baseURL); } boolean foundAtLeastOneResult = false; matcher = patternURL.matcher(bodyAsStr); while (matcher.find()) { String s = matcher.group(1); s = StringEscapeUtils.unescapeXml(s); processURL(message, depth, s,baseURL); foundAtLeastOneResult=true; } // resource is consumed only if at least one link is found return foundAtLeastOneResult; } @Override public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyParsed) { // Fallback parser - if it's an XML message which has not already been processed return !wasAlreadyParsed && message.getResponseHeader().isXml(); } }